diff --git a/.changeset/fix-rpc-permission-gate-bypass.md b/.changeset/fix-rpc-permission-gate-bypass.md
new file mode 100644
index 0000000..5217d04
--- /dev/null
+++ b/.changeset/fix-rpc-permission-gate-bypass.md
@@ -0,0 +1,5 @@
+---
+"@aliou/pi-guardrails": patch
+---
+
+Fix permission gate bypass in RPC mode: deny-by-default when `ctx.ui.custom()` returns undefined, with fallback to `ctx.ui.select()`.
diff --git a/AGENTS.md b/AGENTS.md
index 63005b9..405b01d 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -8,12 +8,15 @@ Pi is pre-1.0.0, so breaking changes can happen between Pi versions. This extens
 
 - TypeScript (strict mode)
 - pnpm 10.26.1
+- Vitest for testing
 - Biome for linting/formatting
 - Changesets for versioning
 
 ## Scripts
 
 ```bash
+pnpm test         # Run tests
+pnpm test:watch   # Run tests in watch mode
 pnpm typecheck    # Type check
 pnpm lint         # Lint (runs on pre-commit)
 pnpm format       # Format
@@ -31,10 +34,19 @@ src/
   components/         # UI components (pattern editor)
   lib/                # Vendored subagent executor core (Phase 1)
   utils/              # Helpers (matching, glob expansion, migration, shell AST)
+tests/
+  utils/              # Test harness utilities (adapted from pi-harness)
+    pi-context.ts     # Spy-based ExtensionContext / UI context builders
+    pi-test-harness.ts # Full extension loader with emitEvent() for hook testing
+    load-extension.ts # Wrapper for Pi internal extension loader
+    matchers.ts       # Custom vitest matchers (toHaveRegisteredTool, etc.)
 ```
 
 ## Conventions
 
+- Tests live next to the code they test (`src/hooks/foo.test.ts`)
+- Hook tests use `setupXxxHook()` directly with a mock `pi` and spy contexts from `tests/utils/pi-context.ts`, rather than loading the full extension (avoids `configLoader` side effects)
+- The full `createPiTestHarness()` is available for testing commands and tools that go through the extension factory
 - New hooks: follow patterns in `src/hooks/`
 - Built-in dangerous command matching uses AST parsing via `@aliou/sh`; user-configured patterns use substring/regex matching
 - File protection is policy-based (`features.policies`, `policies.rules`), not legacy `envFiles`
diff --git a/README.md b/README.md
index fbdf240..b6d825a 100644
--- a/README.md
+++ b/README.md
@@ -192,6 +192,16 @@ Also note:
 
 - `preventBrew`, `preventPython`, `enforcePackageManager`, `packageManager` were removed from guardrails and moved to `@aliou/pi-toolchain`.
 
+## Development
+
+```bash
+pnpm test         # Run tests
+pnpm test:watch   # Run tests in watch mode
+pnpm typecheck    # Type check
+pnpm lint         # Lint
+pnpm format       # Format
+```
+
 ## Events
 
 Guardrails emits events for other extensions:
diff --git a/src/hooks/permission-gate.test.ts b/src/hooks/permission-gate.test.ts
new file mode 100644
index 0000000..359d6ae
--- /dev/null
+++ b/src/hooks/permission-gate.test.ts
@@ -0,0 +1,332 @@
+import type {
+  BashToolCallEvent,
+  ExtensionAPI,
+  ExtensionContext,
+} from "@mariozechner/pi-coding-agent";
+import { createEventBus } from "@mariozechner/pi-coding-agent";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { createEventContext } from "../../tests/utils/pi-context";
+import type { ResolvedConfig } from "../config";
+import { configLoader } from "../config";
+import { setupPermissionGateHook } from "./permission-gate";
+
+// Mock configLoader so allow-session path doesn't throw.
+vi.mock("../config", async (importOriginal) => {
+  const original = (await importOriginal()) as Record<string, unknown>;
+  return {
+    ...original,
+    configLoader: {
+      getConfig: vi.fn(() => ({
+        permissionGate: { allowedPatterns: [] },
+      })),
+      save: vi.fn(async () => {}),
+    },
+  };
+});
+
+// ---------------------------------------------------------------------------
+// Constants — must match the production code's SELECT_* constants
+// ---------------------------------------------------------------------------
+
+const SELECT_ALLOW_ONCE = "Allow once";
+const SELECT_ALLOW_SESSION = "Allow for session";
+const SELECT_DENY = "Deny";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal config enabling the permission gate with defaults.
+ * No custom patterns — relies on built-in structural matchers.
+ */
+function makeConfig(
+  overrides: Partial<ResolvedConfig["permissionGate"]> = {},
+): ResolvedConfig {
+  return {
+    version: "1",
+    enabled: true,
+    applyBuiltinDefaults: true,
+    features: { policies: false, permissionGate: true, pathAccess: false },
+    policies: { rules: [] },
+    pathAccess: { mode: "ask", allowedPaths: [] },
+    permissionGate: {
+      patterns: [],
+      useBuiltinMatchers: true,
+      requireConfirmation: true,
+      allowedPatterns: [],
+      autoDenyPatterns: [],
+      explainCommands: false,
+      explainModel: null,
+      explainTimeout: 5000,
+      ...overrides,
+    },
+  };
+}
+
+type ToolCallHandler = (
+  event: BashToolCallEvent,
+  ctx: ExtensionContext,
+) => Promise<{ block: true; reason: string } | undefined>;
+
+/**
+ * Create a mock ExtensionAPI that captures tool_call handler registrations.
+ * Returns the mock and a function to retrieve the registered handler.
+ */
+function createMockPi() {
+  const handlers: ToolCallHandler[] = [];
+  const eventBus = createEventBus();
+
+  const pi = {
+    on(event: string, handler: ToolCallHandler) {
+      if (event === "tool_call") {
+        handlers.push(handler);
+      }
+    },
+    events: eventBus,
+    // Stubs for any other ExtensionAPI methods that might be called.
+    registerCommand: vi.fn(),
+    registerTool: vi.fn(),
+    emit: vi.fn(),
+  } as unknown as ExtensionAPI;
+
+  return {
+    pi,
+    getHandler(): ToolCallHandler {
+      if (handlers.length === 0) {
+        throw new Error("No tool_call handler registered");
+      }
+      return handlers[0];
+    },
+  };
+}
+
+function bashEvent(command: string): BashToolCallEvent {
+  return {
+    type: "tool_call",
+    toolCallId: "tc_test",
+    toolName: "bash",
+    input: { command },
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+describe("permission gate", () => {
+  let handle: ReturnType<typeof createMockPi>;
+  let handler: ToolCallHandler;
+
+  beforeEach(() => {
+    handle = createMockPi();
+    setupPermissionGateHook(handle.pi, makeConfig());
+    handler = handle.getHandler();
+  });
+
+  it("allows safe commands", async () => {
+    const ctx = createEventContext({ hasUI: true });
+    const result = await handler(bashEvent("echo hello"), ctx);
+    expect(result).toBeUndefined();
+  });
+
+  it("blocks dangerous commands when user denies", async () => {
+    const ctx = createEventContext({
+      hasUI: true,
+      ui: {
+        custom: vi.fn(async () => "deny") as ExtensionContext["ui"]["custom"],
+      },
+    });
+    const result = await handler(bashEvent("sudo rm -rf /"), ctx);
+    expect(result).toEqual({
+      block: true,
+      reason: "User denied dangerous command",
+    });
+  });
+
+  it("allows dangerous commands when user explicitly allows", async () => {
+    const ctx = createEventContext({
+      hasUI: true,
+      ui: {
+        custom: vi.fn(async () => "allow") as ExtensionContext["ui"]["custom"],
+      },
+    });
+    const result = await handler(bashEvent("sudo rm -rf /"), ctx);
+    expect(result).toBeUndefined();
+  });
+
+  it("blocks when hasUI is false (print/RPC mode)", async () => {
+    const ctx = createEventContext({ hasUI: false });
+    const result = await handler(bashEvent("sudo rm -rf /"), ctx);
+    expect(result).toEqual(expect.objectContaining({ block: true }));
+  });
+
+  it("blocks when ctx.ui.custom() returns undefined (RPC stub)", async () => {
+    // This is the bug from issue #19: in RPC mode, ctx.ui.custom() returns
+    // undefined. The permission gate only checks for "deny", so undefined
+    // falls through and the command is silently allowed.
+    const ctx = createEventContext({
+      hasUI: true,
+      ui: {
+        custom: vi.fn(
+          async () => undefined,
+        ) as ExtensionContext["ui"]["custom"],
+        select: vi.fn(
+          async () => undefined,
+        ) as ExtensionContext["ui"]["select"],
+      },
+    });
+    const result = await handler(bashEvent("sudo rm -rf /"), ctx);
+    expect(result).toEqual(expect.objectContaining({ block: true }));
+    expect(ctx.ui.select).toHaveBeenCalled();
+  });
+
+  it("blocks auto-deny patterns without prompting", async () => {
+    const { pi, getHandler } = createMockPi();
+    setupPermissionGateHook(
+      pi,
+      makeConfig({
+        autoDenyPatterns: [{ pattern: "DROP TABLE" }],
+      }),
+    );
+    const h = getHandler();
+    const ctx = createEventContext({ hasUI: true });
+    const result = await h(bashEvent("psql -c 'DROP TABLE users'"), ctx);
+    expect(result).toEqual(expect.objectContaining({ block: true }));
+    // Should not have prompted the user.
+    expect(ctx.ui.custom).not.toHaveBeenCalled();
+  });
+
+  it("skips allowed patterns", async () => {
+    const { pi, getHandler } = createMockPi();
+    setupPermissionGateHook(
+      pi,
+      makeConfig({
+        allowedPatterns: [{ pattern: "sudo echo" }],
+      }),
+    );
+    const h = getHandler();
+    const ctx = createEventContext({ hasUI: true });
+    const result = await h(bashEvent("sudo echo hello"), ctx);
+    expect(result).toBeUndefined();
+  });
+
+  // ---------------------------------------------------------------------------
+  // RPC mode: ctx.ui.select() fallback when ctx.ui.custom() returns undefined
+  // ---------------------------------------------------------------------------
+
+  it("falls back to select() when custom() returns undefined and allows on 'Allow once'", async () => {
+    const ctx = createEventContext({
+      hasUI: true,
+      ui: {
+        custom: vi.fn(
+          async () => undefined,
+        ) as ExtensionContext["ui"]["custom"],
+        select: vi.fn(
+          async () => SELECT_ALLOW_ONCE,
+        ) as ExtensionContext["ui"]["select"],
+      },
+    });
+    const result = await handler(bashEvent("sudo rm -rf /"), ctx);
+    expect(result).toBeUndefined(); // not blocked → allowed
+    expect(ctx.ui.select).toHaveBeenCalled();
+  });
+
+  it("falls back to select() when custom() returns undefined and allows-session on 'Allow for session'", async () => {
+    const ctx = createEventContext({
+      hasUI: true,
+      ui: {
+        custom: vi.fn(
+          async () => undefined,
+        ) as ExtensionContext["ui"]["custom"],
+        select: vi.fn(
+          async () => SELECT_ALLOW_SESSION,
+        ) as ExtensionContext["ui"]["select"],
+      },
+    });
+    const result = await handler(bashEvent("sudo rm -rf /"), ctx);
+    expect(result).toBeUndefined(); // not blocked → allowed with session grant
+    expect(ctx.ui.select).toHaveBeenCalled();
+  });
+
+  it("falls back to select() when custom() returns undefined and blocks on 'Deny'", async () => {
+    const ctx = createEventContext({
+      hasUI: true,
+      ui: {
+        custom: vi.fn(
+          async () => undefined,
+        ) as ExtensionContext["ui"]["custom"],
+        select: vi.fn(
+          async () => SELECT_DENY,
+        ) as ExtensionContext["ui"]["select"],
+      },
+    });
+    const result = await handler(bashEvent("sudo rm -rf /"), ctx);
+    expect(result).toEqual({
+      block: true,
+      reason: "User denied dangerous command",
+    });
+  });
+
+  it("blocks when both custom() and select() return undefined", async () => {
+    const ctx = createEventContext({
+      hasUI: true,
+      ui: {
+        custom: vi.fn(
+          async () => undefined,
+        ) as ExtensionContext["ui"]["custom"],
+        select: vi.fn(
+          async () => undefined,
+        ) as ExtensionContext["ui"]["select"],
+      },
+    });
+    const result = await handler(bashEvent("sudo rm -rf /"), ctx);
+    expect(result).toEqual(expect.objectContaining({ block: true }));
+    expect(ctx.ui.select).toHaveBeenCalled();
+  });
+
+  it("does not call select() when custom() returns a valid result", async () => {
+    const ctx = createEventContext({
+      hasUI: true,
+      ui: {
+        custom: vi.fn(async () => "deny") as ExtensionContext["ui"]["custom"],
+      },
+    });
+    await handler(bashEvent("sudo rm -rf /"), ctx);
+    expect(ctx.ui.select).not.toHaveBeenCalled();
+  });
+
+  it("blocks when select() returns an unrecognized string", async () => {
+    const ctx = createEventContext({
+      hasUI: true,
+      ui: {
+        custom: vi.fn(
+          async () => undefined,
+        ) as ExtensionContext["ui"]["custom"],
+        select: vi.fn(async () => "maybe") as ExtensionContext["ui"]["select"],
+      },
+    });
+    const result = await handler(bashEvent("sudo rm -rf /"), ctx);
+    expect(result).toEqual(expect.objectContaining({ block: true }));
+  });
+
+  it("saves session grant via configLoader when select() returns 'Allow for session'", async () => {
+    const ctx = createEventContext({
+      hasUI: true,
+      ui: {
+        custom: vi.fn(
+          async () => undefined,
+        ) as ExtensionContext["ui"]["custom"],
+        select: vi.fn(
+          async () => SELECT_ALLOW_SESSION,
+        ) as ExtensionContext["ui"]["select"],
+      },
+    });
+    await handler(bashEvent("sudo rm -rf /"), ctx);
+    expect(configLoader.save).toHaveBeenCalledWith("memory", {
+      permissionGate: {
+        allowedPatterns: [{ pattern: "sudo rm -rf /" }],
+      },
+    });
+  });
+});
diff --git a/src/hooks/permission-gate.ts b/src/hooks/permission-gate.ts
index 03ad1ad..8308e2e 100644
--- a/src/hooks/permission-gate.ts
+++ b/src/hooks/permission-gate.ts
@@ -580,10 +580,34 @@ export function setupPermissionGateHook(
 
       type ConfirmResult = "allow" | "allow-session" | "deny";
 
-      const result = await ctx.ui.custom<ConfirmResult>(
+      // Fallback select options for RPC mode (ctx.ui.custom is unimplemented).
+      const SELECT_ALLOW_ONCE = "Allow once";
+      const SELECT_ALLOW_SESSION = "Allow for session";
+      const SELECT_DENY = "Deny";
+      const SELECT_OPTIONS = [
+        SELECT_ALLOW_ONCE,
+        SELECT_ALLOW_SESSION,
+        SELECT_DENY,
+      ] as const;
+
+      let result = await ctx.ui.custom<ConfirmResult>(
         createPermissionGateConfirmComponent(command, description, explanation),
       );
 
+      // Fallback: ctx.ui.custom() returns undefined in RPC/headless mode
+      // (Pi's RPC runtime stubs it as `async custom() { return undefined; }`).
+      // Fall back to ctx.ui.select() which works over the RPC protocol.
+      // If select() also returns undefined/malformed, deny by default.
+      if (result === undefined) {
+        const selection = await ctx.ui.select(
+          `Dangerous command: ${description}`,
+          [...SELECT_OPTIONS],
+        );
+        if (selection === SELECT_ALLOW_ONCE) result = "allow";
+        else if (selection === SELECT_ALLOW_SESSION) result = "allow-session";
+        else result = "deny";
+      }
+
       if (result === "allow-session") {
         // Save command as allowed in memory scope (session-only).
         // Spread the resolved allowed patterns and append the new one.
diff --git a/tests/utils/load-extension.ts b/tests/utils/load-extension.ts
new file mode 100644
index 0000000..5256f9f
--- /dev/null
+++ b/tests/utils/load-extension.ts
@@ -0,0 +1,11 @@
+/**
+ * Wrapper around pi-coding-agent's internal `loadExtensionFromFactory`.
+ *
+ * This function is not part of the package's public API (the `exports` field
+ * only exposes "." and "./hooks"). We import the compiled JS directly by
+ * absolute path and re-export it from this single module so that only one
+ * place needs updating if the internal path changes upstream.
+ *
+ * Vitest resolves this via the `resolve.alias` entry in vitest.config.ts.
+ */
+export { loadExtensionFromFactory } from "#pi-internal/extensions-loader";
diff --git a/tests/utils/matchers.ts b/tests/utils/matchers.ts
new file mode 100644
index 0000000..a3d4960
--- /dev/null
+++ b/tests/utils/matchers.ts
@@ -0,0 +1,54 @@
+/**
+ * Custom vitest matchers for Pi extension test harness.
+ *
+ * These matchers inspect the real `Extension` object produced by the
+ * harness, not proxy-based mock state.
+ */
+
+import { expect } from "vitest";
+import type { PiTestHarness } from "./pi-test-harness";
+
+expect.extend({
+  toHaveRegisteredTool(received: unknown, name: string) {
+    const harness = received as PiTestHarness;
+    const registered = harness.listRegisteredTools();
+    const pass = registered.includes(name);
+
+    return {
+      pass,
+      message: () =>
+        pass
+          ? `expected harness not to have registered tool "${name}"`
+          : `expected harness to have registered tool "${name}", registered: [${registered.join(", ")}]`,
+      actual: registered,
+      expected: name,
+    };
+  },
+  toHaveRegisteredCommand(received: unknown, name: string) {
+    const harness = received as PiTestHarness;
+    const registered = harness.listRegisteredCommands();
+    const pass = registered.includes(name);
+
+    return {
+      pass,
+      message: () =>
+        pass
+          ? `expected harness not to have registered command "${name}"`
+          : `expected harness to have registered command "${name}", registered: [${registered.join(", ")}]`,
+      actual: registered,
+      expected: name,
+    };
+  },
+});
+
+declare module "vitest" {
+  interface Assertion<T> {
+    toHaveRegisteredTool(name: string): T;
+    toHaveRegisteredCommand(name: string): T;
+  }
+
+  interface AsymmetricMatchersContaining {
+    toHaveRegisteredTool(name: string): void;
+    toHaveRegisteredCommand(name: string): void;
+  }
+}
diff --git a/tests/utils/pi-context.ts b/tests/utils/pi-context.ts
new file mode 100644
index 0000000..2d25538
--- /dev/null
+++ b/tests/utils/pi-context.ts
@@ -0,0 +1,221 @@
+/**
+ * Explicit spy-based context builders for Pi extension tests.
+ *
+ * Every function property is a `vi.fn()` with a sensible default. This makes
+ * tests readable (you see exactly which properties exist) and keeps call
+ * tracking / override ergonomics that deep proxy mocks provide, without the
+ * hidden "any property access succeeds" footgun.
+ */
+
+import type {
+  ExtensionAPI,
+  ExtensionCommandContext,
+  ExtensionUIContext,
+  SessionManager,
+} from "@mariozechner/pi-coding-agent";
+import { vi } from "vitest";
+
+/**
+ * ReadonlySessionManager is not exported from pi-coding-agent's public API.
+ * We reconstruct the type here as a Pick of SessionManager.
+ */
+type ReadonlySessionManager = Pick<
+  SessionManager,
+  | "getCwd"
+  | "getSessionDir"
+  | "getSessionId"
+  | "getSessionFile"
+  | "getLeafId"
+  | "getLeafEntry"
+  | "getEntry"
+  | "getLabel"
+  | "getBranch"
+  | "getHeader"
+  | "getEntries"
+  | "getTree"
+  | "getSessionName"
+>;
+
+// ---------------------------------------------------------------------------
+// UI context
+// ---------------------------------------------------------------------------
+
+export type UIOverrides = Partial<ExtensionUIContext>;
+
+function createUIContext(overrides: UIOverrides = {}): ExtensionUIContext {
+  return {
+    select: vi.fn(async () => undefined),
+    confirm: vi.fn(async () => false),
+    input: vi.fn(async () => undefined),
+    notify: vi.fn(),
+    custom: vi.fn(async () => undefined),
+    onTerminalInput: vi.fn(() => () => {}),
+    setStatus: vi.fn(),
+    setWorkingMessage: vi.fn(),
+    setWidget: vi.fn(),
+    setFooter: vi.fn(),
+    setHeader: vi.fn(),
+    setTitle: vi.fn(),
+    pasteToEditor: vi.fn(),
+    setEditorText: vi.fn(),
+    getEditorText: vi.fn(() => ""),
+    editor: vi.fn(async () => undefined),
+    setEditorComponent: vi.fn(),
+    setToolsExpanded: vi.fn(),
+    ...overrides,
+  } as ExtensionUIContext;
+}
+
+// ---------------------------------------------------------------------------
+// Command context
+// ---------------------------------------------------------------------------
+
+export interface CommandContextOverrides {
+  cwd?: string;
+  hasUI?: boolean;
+  ui?: UIOverrides;
+  sessionManager?: ReadonlySessionManager;
+  modelRegistry?: ExtensionCommandContext["modelRegistry"];
+  model?: ExtensionCommandContext["model"];
+  isIdle?: () => boolean;
+  abort?: () => void;
+  hasPendingMessages?: () => boolean;
+  shutdown?: () => void;
+  getContextUsage?: () => undefined;
+  compact?: () => void;
+  getSystemPrompt?: () => string;
+  waitForIdle?: () => Promise<void>;
+  newSession?: ExtensionCommandContext["newSession"];
+  fork?: ExtensionCommandContext["fork"];
+  navigateTree?: ExtensionCommandContext["navigateTree"];
+  switchSession?: ExtensionCommandContext["switchSession"];
+  reload?: () => Promise<void>;
+}
+
+/**
+ * Build an `ExtensionCommandContext` with every method as a spy.
+ * Pass overrides for the properties your test cares about.
+ */
+export function createCommandContext(
+  overrides: CommandContextOverrides = {},
+): ExtensionCommandContext {
+  const ui = createUIContext(overrides.ui);
+
+  return {
+    cwd: overrides.cwd ?? process.cwd(),
+    hasUI: overrides.hasUI ?? true,
+    ui,
+    signal: undefined,
+    sessionManager: overrides.sessionManager ?? stubSessionManager(),
+    modelRegistry:
+      overrides.modelRegistry ??
+      ({} as ExtensionCommandContext["modelRegistry"]),
+    model: overrides.model ?? undefined,
+    isIdle: vi.fn(overrides.isIdle ?? (() => true)),
+    abort: vi.fn(overrides.abort ?? (() => {})),
+    hasPendingMessages: vi.fn(overrides.hasPendingMessages ?? (() => false)),
+    shutdown: vi.fn(overrides.shutdown ?? (() => {})),
+    getContextUsage: vi.fn(overrides.getContextUsage ?? (() => undefined)),
+    compact: vi.fn(overrides.compact ?? (() => {})),
+    getSystemPrompt: vi.fn(overrides.getSystemPrompt ?? (() => "")),
+    waitForIdle: vi.fn(overrides.waitForIdle ?? (async () => {})),
+    newSession: vi.fn(
+      overrides.newSession ?? (async () => ({ cancelled: false })),
+    ),
+    fork: vi.fn(overrides.fork ?? (async () => ({ cancelled: false }))),
+    navigateTree: vi.fn(
+      overrides.navigateTree ?? (async () => ({ cancelled: false })),
+    ),
+    switchSession: vi.fn(
+      overrides.switchSession ?? (async () => ({ cancelled: false })),
+    ),
+    reload: vi.fn(overrides.reload ?? (async () => {})),
+  } as ExtensionCommandContext;
+}
+
+// ---------------------------------------------------------------------------
+// Tool context
+// ---------------------------------------------------------------------------
+
+export interface ToolContextOverrides {
+  cwd?: string;
+}
+
+type ToolContext = NonNullable<
+  Parameters<Parameters<ExtensionAPI["registerTool"]>[0]["execute"]>[4]
+>;
+
+/**
+ * Build a minimal tool execution context. Tools typically only need `cwd`.
+ */
+export function createToolContext(
+  overrides: ToolContextOverrides = {},
+): ToolContext {
+  return {
+    cwd: overrides.cwd ?? process.cwd(),
+    signal: undefined,
+  } as unknown as ToolContext;
+}
+
+// ---------------------------------------------------------------------------
+// Event context (for tool_call / session_start handlers)
+// ---------------------------------------------------------------------------
+
+export interface EventContextOverrides {
+  cwd?: string;
+  hasUI?: boolean;
+  ui?: UIOverrides;
+  sessionManager?: ReadonlySessionManager;
+}
+
+/**
+ * Build an `ExtensionContext` for event handlers (tool_call, session_start).
+ * Lighter than command context — no session control methods.
+ */
+export function createEventContext(overrides: EventContextOverrides = {}) {
+  const ui = createUIContext(overrides.ui);
+
+  return {
+    cwd: overrides.cwd ?? process.cwd(),
+    hasUI: overrides.hasUI ?? true,
+    ui,
+    signal: undefined,
+    sessionManager: overrides.sessionManager ?? stubSessionManager(),
+    modelRegistry: {} as ExtensionCommandContext["modelRegistry"],
+    model: undefined,
+    isIdle: vi.fn(() => true),
+    abort: vi.fn(),
+    hasPendingMessages: vi.fn(() => false),
+    shutdown: vi.fn(),
+    getContextUsage: vi.fn(() => undefined),
+    compact: vi.fn(),
+    getSystemPrompt: vi.fn(() => ""),
+  };
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+/**
+ * Minimal stub for ReadonlySessionManager when the test does not interact
+ * with session state at all. Every method is a vi.fn() returning a safe
+ * default.
+ */
+function stubSessionManager(): ReadonlySessionManager {
+  return {
+    getCwd: vi.fn(() => process.cwd()),
+    getSessionDir: vi.fn(() => ""),
+    getSessionId: vi.fn(() => "stub-session-id"),
+    getSessionFile: vi.fn(() => undefined),
+    getLeafId: vi.fn(() => null),
+    getLeafEntry: vi.fn(() => undefined),
+    getEntry: vi.fn(() => undefined),
+    getLabel: vi.fn(() => undefined),
+    getBranch: vi.fn(() => []),
+    getHeader: vi.fn(() => undefined),
+    getEntries: vi.fn(() => []),
+    getTree: vi.fn(() => []),
+    getSessionName: vi.fn(() => undefined),
+  } as unknown as ReadonlySessionManager;
+}
diff --git a/tests/utils/pi-internal.d.ts b/tests/utils/pi-internal.d.ts
new file mode 100644
index 0000000..31a9f1b
--- /dev/null
+++ b/tests/utils/pi-internal.d.ts
@@ -0,0 +1,21 @@
+/**
+ * Type declarations for the internal pi-coding-agent module aliased via
+ * vitest.config.ts. This mirrors the exports of
+ * `@mariozechner/pi-coding-agent/dist/core/extensions/loader.js`.
+ */
+declare module "#pi-internal/extensions-loader" {
+  import type {
+    EventBus,
+    Extension,
+    ExtensionFactory,
+    ExtensionRuntime,
+  } from "@mariozechner/pi-coding-agent";
+
+  export function loadExtensionFromFactory(
+    factory: ExtensionFactory,
+    cwd: string,
+    eventBus: EventBus,
+    runtime: ExtensionRuntime,
+    extensionPath?: string,
+  ): Promise<Extension>;
+}
diff --git a/tests/utils/pi-test-harness.ts b/tests/utils/pi-test-harness.ts
new file mode 100644
index 0000000..f4d9517
--- /dev/null
+++ b/tests/utils/pi-test-harness.ts
@@ -0,0 +1,230 @@
+/**
+ * Test harness that loads extension factories using real Pi internals.
+ *
+ * Instead of deep proxy mocks, this uses:
+ * - Real `createEventBus()` and `createExtensionRuntime()`
+ * - Real `loadExtensionFromFactory()` so extensions register through the
+ *   actual `ExtensionAPI` code path
+ * - Explicit vi.fn() spies for context objects (see pi-context.ts)
+ *
+ * The harness exposes the loaded `Extension` object so matchers and tests
+ * can inspect registered commands, tools, and event handlers directly.
+ *
+ * Context overrides (sessionManager, UI spies, etc.) are set at harness
+ * creation time and apply to every command execution. Per-call overrides
+ * passed to `execute()` merge on top when needed.
+ *
+ * A built-in `newSession` spy creates a real `SessionManager.inMemory()`
+ * for each child session and exposes it via `getChildSessionManager()`.
+ */
+
+import { mkdtempSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import type {
+  Extension,
+  ExtensionCommandContext,
+  ExtensionFactory,
+  ExtensionRuntime,
+  RegisteredCommand,
+  SessionManager,
+  ToolDefinition,
+} from "@mariozechner/pi-coding-agent";
+import {
+  createEventBus,
+  createExtensionRuntime,
+  SessionManager as SessionManagerClass,
+} from "@mariozechner/pi-coding-agent";
+import { vi } from "vitest";
+import { loadExtensionFromFactory } from "./load-extension";
+import {
+  type CommandContextOverrides,
+  createCommandContext,
+  createEventContext,
+  createToolContext,
+  type EventContextOverrides,
+} from "./pi-context";
+
+export interface PiTestHarness {
+  /** Working directory used by the harness. */
+  cwd: string;
+  /** The real Extension object produced by the factory. */
+  extension: Extension;
+  /**
+   * The shared ExtensionRuntime. Action methods (setModel, setThinkingLevel,
+   * etc.) are throwing stubs until replaced. Tests can assign vi.fn() spies
+   * directly to patch them before emitting events.
+   */
+  runtime: ExtensionRuntime;
+  /**
+   * Built-in `newSession` spy. When a command calls `ctx.newSession()`,
+   * this spy creates a real `SessionManager.inMemory()`, runs the
+   * `setup()` callback on it, and returns `{ cancelled: false }`.
+   */
+  newSession: ExtensionCommandContext["newSession"];
+  /**
+   * Returns the `SessionManager` that was created for the most recent
+   * child session (from the `newSession` spy), or `undefined` if no
+   * child session has been created yet.
+   */
+  getChildSessionManager(): SessionManager | undefined;
+  /** Look up a registered command by name and get an executor. */
+  command(name: string): CommandHandle;
+  /** Look up a registered tool by name and get an executor. */
+  tool(name: string): ToolHandle;
+  /**
+   * Emit an event directly to the extension's registered handlers.
+   * Returns the first non-undefined result (e.g., { block: true }).
+   */
+  emitEvent(
+    eventName: string,
+    event: unknown,
+    ctxOverrides?: EventContextOverrides,
+  ): Promise<unknown>;
+  /** All registered command names. */
+  listRegisteredCommands(): string[];
+  /** All registered tool names. */
+  listRegisteredTools(): string[];
+}
+
+export interface CommandHandle {
+  registered: RegisteredCommand;
+  execute(
+    args?: string,
+    overrides?: CommandContextOverrides,
+  ): Promise<ExtensionCommandContext>;
+}
+
+export interface ToolHandle {
+  /** The ToolDefinition that was registered (has execute, renderCall, etc). */
+  registered: ToolDefinition;
+  execute(params: Record<string, unknown>): Promise<unknown>;
+}
+
+export interface PiTestHarnessOptions {
+  cwd?: string;
+  extensionPath?: string;
+  /**
+   * Default context overrides applied to every command execution.
+   * Per-call overrides passed to `execute()` merge on top, with UI
+   * overrides deep-merged so harness-level and per-call spies coexist.
+   */
+  context?: CommandContextOverrides;
+}
+
+/**
+ * Create a test harness that loads an extension factory through real Pi
+ * internals. The returned harness lets you execute registered commands and
+ * tools with spy-based contexts.
+ *
+ * Context overrides set here become defaults for all command executions.
+ * The harness includes a built-in `newSession` spy that creates real
+ * in-memory session managers for child sessions. Access the most recent
+ * child via `getChildSessionManager()`.
+ */
+export async function createPiTestHarness(
+  factory: ExtensionFactory,
+  options: PiTestHarnessOptions = {},
+): Promise<PiTestHarness> {
+  const cwd = options.cwd ?? mkdtempSync(join(tmpdir(), "pi-test-cwd-"));
+  const harnessContext = options.context ?? {};
+  const eventBus = createEventBus();
+  const runtime = createExtensionRuntime();
+
+  const extension = await loadExtensionFromFactory(
+    factory,
+    cwd,
+    eventBus,
+    runtime,
+    options.extensionPath ?? "<test-extension>",
+  );
+
+  // Built-in newSession spy: creates a real child SessionManager and runs
+  // the setup callback, so tests can inspect entries written to the child.
+  let childSm: SessionManager | undefined;
+  const newSession = vi.fn(
+    async (opts?: Parameters<ExtensionCommandContext["newSession"]>[0]) => {
+      childSm = SessionManagerClass.inMemory();
+      if (opts?.setup) {
+        await opts.setup(childSm);
+      }
+      return { cancelled: false };
+    },
+  ) as unknown as ExtensionCommandContext["newSession"];
+
+  let toolCallCounter = 0;
+
+  function command(name: string): CommandHandle {
+    const registered = extension.commands.get(name);
+    if (!registered) {
+      const available = [...extension.commands.keys()].join(", ");
+      throw new Error(
+        `Command "${name}" is not registered. Registered: [${available}]`,
+      );
+    }
+    return {
+      registered,
+      async execute(
+        args = "",
+        overrides: CommandContextOverrides = {},
+      ): Promise<ExtensionCommandContext> {
+        const ctx = createCommandContext({
+          cwd,
+          newSession,
+          ...harnessContext,
+          ...overrides,
+          // Deep-merge UI so harness-level and per-call spies coexist.
+          ui: { ...harnessContext.ui, ...overrides.ui },
+        });
+        await registered.handler(args, ctx);
+        return ctx;
+      },
+    };
+  }
+
+  function tool(name: string): ToolHandle {
+    const entry = extension.tools.get(name);
+    if (!entry) {
+      const available = [...extension.tools.keys()].join(", ");
+      throw new Error(
+        `Tool "${name}" is not registered. Registered: [${available}]`,
+      );
+    }
+    const definition = entry.definition;
+    return {
+      registered: definition,
+      execute(params: Record<string, unknown>) {
+        const id = `tc_${++toolCallCounter}`;
+        const ctx = createToolContext({ cwd });
+        return definition.execute(id, params, undefined, undefined, ctx);
+      },
+    };
+  }
+
+  async function emitEvent(
+    eventName: string,
+    event: unknown,
+    ctxOverrides: EventContextOverrides = {},
+  ): Promise<unknown> {
+    const handlers = extension.handlers.get(eventName) ?? [];
+    const ctx = createEventContext({ cwd, ...ctxOverrides });
+    for (const handler of handlers) {
+      const result = await handler(event, ctx);
+      if (result !== undefined) return result;
+    }
+    return undefined;
+  }
+
+  return {
+    cwd,
+    extension,
+    runtime,
+    newSession,
+    getChildSessionManager: () => childSm,
+    command,
+    tool,
+    emitEvent,
+    listRegisteredCommands: () => [...extension.commands.keys()],
+    listRegisteredTools: () => [...extension.tools.keys()],
+  };
+}
diff --git a/tests/utils/theme.ts b/tests/utils/theme.ts
new file mode 100644
index 0000000..5b0850a
--- /dev/null
+++ b/tests/utils/theme.ts
@@ -0,0 +1,19 @@
+/**
+ * No-op theme for testing render functions. Every styling function returns
+ * the text unchanged, which is enough to exercise renderCall / renderResult
+ * without pulling in a real terminal theme.
+ */
+
+import type { Theme } from "@mariozechner/pi-coding-agent";
+
+const identity = (_color: string, text: string) => text;
+
+export const NOOP_THEME: Theme = {
+  fg: identity,
+  bg: identity,
+  bold: (t: string) => t,
+  italic: (t: string) => t,
+  underline: (t: string) => t,
+  strikethrough: (t: string) => t,
+  inverse: (t: string) => t,
+} as Theme;
diff --git a/tests/utils/tmpdir.ts b/tests/utils/tmpdir.ts
new file mode 100644
index 0000000..fecf10a
--- /dev/null
+++ b/tests/utils/tmpdir.ts
@@ -0,0 +1,13 @@
+import { mkdtemp, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+import { test as baseTest } from "vitest";
+
+export const tmpdirTest = baseTest.extend<{ tmpdir: string }>({
+  // biome-ignore lint/correctness/noEmptyPattern: Vitest fixture API requires destructuring first arg
+  tmpdir: async ({}, use) => {
+    const directory = await mkdtemp(join(tmpdir(), "vitest-"));
+    await use(directory);
+    await rm(directory, { recursive: true, force: true });
+  },
+});
diff --git a/tests/vitest.setup.ts b/tests/vitest.setup.ts
new file mode 100644
index 0000000..9f79ac0
--- /dev/null
+++ b/tests/vitest.setup.ts
@@ -0,0 +1 @@
+import "./utils/matchers";
diff --git a/tsconfig.json b/tsconfig.json
index 69a38f1..9831d3a 100644
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -10,6 +10,6 @@
     "resolveJsonModule": true,
     "noEmit": true
   },
-  "include": ["src/**/*"],
+  "include": ["src/**/*", "tests/**/*", "vitest.config.ts"],
   "exclude": ["node_modules"]
 }
diff --git a/vitest.config.ts b/vitest.config.ts
index a7104d7..0a7fa63 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -1,9 +1,21 @@
+import { resolve } from "node:path";
 import { defineConfig } from "vitest/config";
 
 export default defineConfig({
+  resolve: {
+    alias: {
+      // Internal pi-coding-agent module not exposed via package "exports".
+      // Mapped here so tests can import it; the single wrapper in
+      // tests/utils/load-extension.ts is the only consumer.
+      "#pi-internal/extensions-loader": resolve(
+        "node_modules/@mariozechner/pi-coding-agent/dist/core/extensions/loader.js",
+      ),
+    },
+  },
   test: {
     environment: "node",
-    include: ["src/**/*.test.ts"],
+    include: ["src/**/*.test.ts", "tests/**/*.test.ts"],
+    setupFiles: ["./tests/vitest.setup.ts"],
     mockReset: true,
   },
 });