diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 204d37ef..a7c27d38 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,7 +26,7 @@ concurrency: jobs: ci: runs-on: ubuntu-latest - timeout-minutes: 15 + timeout-minutes: 25 steps: - name: Checkout @@ -54,6 +54,12 @@ jobs: working-directory: webview-ui run: npm ci + - name: Install Playwright Dependencies + id: install_playwright_deps + if: always() && steps.install_root.outcome == 'success' + run: npx playwright install --with-deps chromium + continue-on-error: true + # --- Quality Checks (blocking) --- - name: Type Check @@ -75,6 +81,13 @@ jobs: run: npm run lint continue-on-error: true + - name: Webview Tests + id: webview_test + if: always() && steps.install_webview.outcome == 'success' + working-directory: webview-ui + run: npm test + continue-on-error: true + - name: Format Check id: format_check if: always() && steps.install_root.outcome == 'success' @@ -91,6 +104,12 @@ jobs: cd webview-ui && npm run build continue-on-error: true + - name: E2E Tests + id: e2e_test + if: always() && steps.build.outcome == 'success' && steps.install_playwright_deps.outcome == 'success' + run: npm run e2e + continue-on-error: true + # --- Advisory Checks (non-blocking) --- - name: Audit Root Dependencies @@ -115,11 +134,14 @@ jobs: SETUP_NODE: ${{ steps.setup_node.outcome }} INSTALL_ROOT: ${{ steps.install_root.outcome }} INSTALL_WEBVIEW: ${{ steps.install_webview.outcome }} + INSTALL_PLAYWRIGHT_DEPS: ${{ steps.install_playwright_deps.outcome }} TYPE_CHECK: ${{ steps.type_check.outcome }} ROOT_LINT: ${{ steps.root_lint.outcome }} WEBVIEW_LINT: ${{ steps.webview_lint.outcome }} + WEBVIEW_TEST: ${{ steps.webview_test.outcome }} FORMAT_CHECK: ${{ steps.format_check.outcome }} BUILD: ${{ steps.build.outcome }} + E2E_TEST: ${{ steps.e2e_test.outcome }} AUDIT_ROOT: ${{ steps.audit_root.outcome }} AUDIT_WEBVIEW: ${{ steps.audit_webview.outcome }} run: | @@ -135,11 +157,14 @@ jobs: echo "| Setup Node | $(status "$SETUP_NODE") |" echo "| Install root deps | $(status "$INSTALL_ROOT") |" echo "| Install webview deps | $(status "$INSTALL_WEBVIEW") |" + echo "| Install Playwright deps | $(status "$INSTALL_PLAYWRIGHT_DEPS") |" echo "| **Type check** | $(status "$TYPE_CHECK") |" echo "| **Root lint** | $(status "$ROOT_LINT") |" echo "| **Webview lint** | $(status "$WEBVIEW_LINT") |" + echo "| **Webview tests** | $(status "$WEBVIEW_TEST") |" echo "| **Format check** | $(status "$FORMAT_CHECK") |" echo "| **Build** | $(status "$BUILD") |" + echo "| **E2E tests** | $(status "$E2E_TEST") |" echo "| Audit root _(advisory)_ | $(status "$AUDIT_ROOT") |" echo "| Audit webview _(advisory)_ | $(status "$AUDIT_WEBVIEW") |" } >> "$GITHUB_STEP_SUMMARY" @@ -153,16 +178,19 @@ jobs: SETUP_NODE: ${{ steps.setup_node.outcome }} INSTALL_ROOT: ${{ steps.install_root.outcome }} INSTALL_WEBVIEW: ${{ steps.install_webview.outcome }} + INSTALL_PLAYWRIGHT_DEPS: ${{ steps.install_playwright_deps.outcome }} TYPE_CHECK: ${{ steps.type_check.outcome }} ROOT_LINT: ${{ steps.root_lint.outcome }} WEBVIEW_LINT: ${{ steps.webview_lint.outcome }} + WEBVIEW_TEST: ${{ steps.webview_test.outcome }} FORMAT_CHECK: ${{ steps.format_check.outcome }} BUILD: ${{ steps.build.outcome }} + E2E_TEST: ${{ steps.e2e_test.outcome }} run: | failed=0 for step in CHECKOUT SETUP_NODE INSTALL_ROOT INSTALL_WEBVIEW \ - TYPE_CHECK ROOT_LINT WEBVIEW_LINT FORMAT_CHECK \ - BUILD; do + INSTALL_PLAYWRIGHT_DEPS TYPE_CHECK ROOT_LINT WEBVIEW_LINT \ + WEBVIEW_TEST FORMAT_CHECK BUILD E2E_TEST; do eval "val=\$$step" if [ "$val" != "success" ]; then echo "::error::$step failed" diff --git a/.gitignore b/.gitignore index 30ab455b..66b1f150 100644 --- a/.gitignore +++ b/.gitignore @@ -17,6 +17,10 @@ Thumbs.db .vscode-test/ /.idea +# E2E test artifacts +test-results/ +playwright-report/ + # Build artifacts *.vsix *.map diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e9c1487a..24e9e9bf 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -94,6 +94,52 @@ These conventions are enforced by custom ESLint rules (`eslint-rules/pixel-agent These rules are set to `warn` — they won't block your PR but will flag violations for cleanup. +## End-to-End Tests + +The `e2e/` directory contains Playwright tests that launch a real VS Code instance with the extension loaded in development mode. + +### Running e2e tests locally + +```bash +# Build the extension first (tests load the compiled output) +npm run build + +# Headless (default — uses xvfb-run on Linux) +npm run e2e + +# Headed (shows the VS Code window) +npm run e2e:headed + +# Step-by-step debug mode +npm run e2e:debug +``` + +On the first run, `@vscode/test-electron` will download a stable VS Code release into `.vscode-test/` (≈200 MB). Subsequent runs reuse the cache. + +### Artifacts + +All test artifacts are written to `test-results/e2e/`: + +| Path | Contents | +|---|---| +| `test-results/e2e/videos//` | `.webm` screen recording for every test | +| `test-results/e2e/html/` | Playwright HTML report (`npx playwright show-report test-results/e2e/html`) | +| `test-results/e2e/*.png` | Final screenshots saved on failure | + +On failure, the test output prints the path to the video for that run. + +### Mock claude + +Tests never invoke the real `claude` CLI. Instead, a bash script at `e2e/fixtures/mock-claude` is copied into an isolated `bin/` directory and prepended to `PATH` before VS Code starts. + +The mock: +1. Parses `--session-id ` from its arguments. +2. Appends a line to `$HOME/.claude-mock/invocations.log` so tests can assert it was called. +3. Creates `$HOME/.claude/projects//.jsonl` with a minimal init line so the extension's file-watcher can detect the session. +4. Sleeps for 30 s (keeps the terminal alive) then exits. + +Each test runs with an isolated `HOME` and `--user-data-dir`, so no test state leaks between runs or into your real VS Code profile. + ## Submitting a Pull Request 1. Fork the repo and create a feature branch from `main` diff --git a/e2e/fixtures/mock-claude b/e2e/fixtures/mock-claude new file mode 100755 index 00000000..a1db3a99 --- /dev/null +++ b/e2e/fixtures/mock-claude @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# Mock 'claude' executable for Pixel Agents e2e tests. +# +# Behaviour: +# 1. Parses --session-id from args. +# 2. Appends an invocation record to $HOME/.claude-mock/invocations.log. +# 3. Creates the expected JSONL file under $HOME/.claude/projects//.jsonl +# using the same path-hash algorithm as agentManager.ts +# (replace every non-[a-zA-Z0-9-] char with '-'). +# 4. Writes a minimal valid JSONL line so the extension file-watcher can proceed. +# 5. Stays alive for up to 30 s (tests can kill it once assertions pass). + +set -euo pipefail + +SESSION_ID="" +PREV="" +for arg in "$@"; do + if [ "$PREV" = "--session-id" ]; then + SESSION_ID="$arg" + fi + PREV="$arg" +done + +LOG_DIR="${HOME}/.claude-mock" +mkdir -p "$LOG_DIR" +echo "$(date -Iseconds) session-id=${SESSION_ID} cwd=$(pwd) args=$*" >> "${LOG_DIR}/invocations.log" + +if [ -n "$SESSION_ID" ]; then + CWD="$(pwd)" + # Replicate agentManager.ts: workspacePath.replace(/[^a-zA-Z0-9-]/g, '-') + DIR_NAME="$(printf '%s' "$CWD" | tr -cs 'a-zA-Z0-9-' '-')" + PROJECT_DIR="${HOME}/.claude/projects/${DIR_NAME}" + mkdir -p "$PROJECT_DIR" + JSONL_FILE="${PROJECT_DIR}/${SESSION_ID}.jsonl" + + # Write a minimal system init line so the extension watcher sees the file. + printf '{"type":"system","subtype":"init","content":"mock-claude-ready"}\n' >> "$JSONL_FILE" +fi + +# Stay alive so the VS Code terminal doesn't immediately close. +sleep 30 & +SLEEP_PID=$! + +# Clean exit on SIGTERM/SIGINT. +trap 'kill $SLEEP_PID 2>/dev/null; exit 0' SIGTERM SIGINT + +wait $SLEEP_PID || true diff --git a/e2e/global-setup.ts b/e2e/global-setup.ts new file mode 100644 index 00000000..d71dcd7e --- /dev/null +++ b/e2e/global-setup.ts @@ -0,0 +1,18 @@ +import { downloadAndUnzipVSCode } from '@vscode/test-electron'; +import fs from 'fs'; +import path from 'path'; + +export const VSCODE_CACHE_DIR = path.join(__dirname, '../.vscode-test'); +export const VSCODE_PATH_FILE = path.join(VSCODE_CACHE_DIR, 'vscode-executable.txt'); + +export default async function globalSetup(): Promise { + console.log('[e2e] Ensuring VS Code is downloaded...'); + const vscodePath = await downloadAndUnzipVSCode({ + version: 'stable', + cachePath: VSCODE_CACHE_DIR, + }); + console.log(`[e2e] VS Code executable: ${vscodePath}`); + + fs.mkdirSync(VSCODE_CACHE_DIR, { recursive: true }); + fs.writeFileSync(VSCODE_PATH_FILE, vscodePath, 'utf8'); +} diff --git a/e2e/helpers/launch.ts b/e2e/helpers/launch.ts new file mode 100644 index 00000000..7d649c6b --- /dev/null +++ b/e2e/helpers/launch.ts @@ -0,0 +1,132 @@ +import { _electron as electron } from '@playwright/test'; +import type { ElectronApplication, Page } from '@playwright/test'; +import fs from 'fs'; +import os from 'os'; +import path from 'path'; + +const REPO_ROOT = path.join(__dirname, '../..'); +const VSCODE_PATH_FILE = path.join(REPO_ROOT, '.vscode-test/vscode-executable.txt'); +const MOCK_CLAUDE_PATH = path.join(REPO_ROOT, 'e2e/fixtures/mock-claude'); +const ARTIFACTS_DIR = path.join(REPO_ROOT, 'test-results/e2e'); + +export interface VSCodeSession { + app: ElectronApplication; + window: Page; + /** Isolated HOME directory for this test session. */ + tmpHome: string; + /** Workspace directory opened in VS Code. */ + workspaceDir: string; + /** Path to the mock invocations log. */ + mockLogFile: string; + cleanup: () => Promise; +} + +/** + * Launch VS Code with the Pixel Agents extension loaded in development mode. + * + * Uses an isolated temp HOME and injects the mock `claude` binary at the + * front of PATH so no real Claude CLI is needed. + */ +export async function launchVSCode(testTitle: string): Promise { + const vscodePath = fs.readFileSync(VSCODE_PATH_FILE, 'utf8').trim(); + + // --- Isolated temp directories --- + const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'pixel-e2e-')); + const tmpHome = path.join(tmpBase, 'home'); + const workspaceDir = path.join(tmpBase, 'workspace'); + const userDataDir = path.join(tmpBase, 'userdata'); + const mockBinDir = path.join(tmpBase, 'bin'); + + fs.mkdirSync(tmpHome, { recursive: true }); + fs.mkdirSync(workspaceDir, { recursive: true }); + fs.mkdirSync(userDataDir, { recursive: true }); + fs.mkdirSync(mockBinDir, { recursive: true }); + + // Copy mock-claude into an isolated bin dir and symlink as 'claude' + const mockDest = path.join(mockBinDir, 'claude'); + fs.copyFileSync(MOCK_CLAUDE_PATH, mockDest); + fs.chmodSync(mockDest, 0o755); + + const mockLogFile = path.join(tmpHome, '.claude-mock', 'invocations.log'); + + // --- Video output dir --- + const safeTitle = testTitle.replace(/[^a-z0-9]+/gi, '-').toLowerCase(); + const videoDir = path.join(ARTIFACTS_DIR, 'videos', safeTitle); + fs.mkdirSync(videoDir, { recursive: true }); + + // --- Environment for VS Code process --- + const env: Record = { + ...process.env as Record, + HOME: tmpHome, + // Prepend mock bin so 'claude' resolves to our mock + PATH: `${mockBinDir}:${process.env['PATH'] ?? '/usr/local/bin:/usr/bin:/bin'}`, + // Prevent VS Code from trying to talk to real accounts / telemetry + VSCODE_TELEMETRY_DISABLED: '1', + }; + + // --- VS Code launch args --- + const args = [ + // Load our extension in dev mode (this overrides the installed version) + `--extensionDevelopmentPath=${REPO_ROOT}`, + // Disable all other extensions so tests are isolated + '--disable-extensions', + // Isolated user-data (settings, state, etc.) + `--user-data-dir=${userDataDir}`, + // Skip interactive prompts + '--disable-workspace-trust', + '--skip-release-notes', + '--skip-welcome', + '--no-sandbox', + // Open the workspace folder + workspaceDir, + ]; + + const cleanup = async (): Promise => { + try { + if (app) { + await app.close(); + } + } catch { + // ignore close errors + } + try { + fs.rmSync(tmpBase, { recursive: true, force: true }); + } catch { + // ignore cleanup errors + } + }; + + let app: ElectronApplication | undefined; + + try { + app = await electron.launch({ + executablePath: vscodePath, + args, + env, + cwd: workspaceDir, + recordVideo: { + dir: videoDir, + size: { width: 1280, height: 800 }, + }, + timeout: 60_000, + }); + + // Electron can expose the window before the page lifecycle events settle. + // The test waits for `.monaco-workbench`, so returning the window here is + // more reliable than waiting on `domcontentloaded` in CI. + const window = await app.firstWindow(); + + return { app, window, tmpHome, workspaceDir, mockLogFile, cleanup }; + } catch (error) { + await cleanup(); + throw error; + } +} + +/** + * Wait for VS Code's workbench to be fully ready before interacting. + */ +export async function waitForWorkbench(window: Page): Promise { + // VS Code renders a div.monaco-workbench when the shell is ready + await window.waitForSelector('.monaco-workbench', { timeout: 60_000 }); +} diff --git a/e2e/helpers/webview.ts b/e2e/helpers/webview.ts new file mode 100644 index 00000000..a42d6f81 --- /dev/null +++ b/e2e/helpers/webview.ts @@ -0,0 +1,105 @@ +import type { Frame, Page } from '@playwright/test'; +import { expect } from '@playwright/test'; + +const WEBVIEW_TIMEOUT_MS = 30_000; +const PANEL_OPEN_TIMEOUT_MS = 15_000; +const MIN_PANEL_HEIGHT_PX = 320; + +async function runCommand(window: Page, command: string): Promise { + await window.keyboard.press('F1'); + await window.waitForSelector('.quick-input-widget', { timeout: PANEL_OPEN_TIMEOUT_MS }); + await window.keyboard.type(command); + await window.waitForSelector('.quick-input-list .monaco-list-row', { + timeout: PANEL_OPEN_TIMEOUT_MS, + }); + await window.keyboard.press('Enter'); + await window.waitForSelector('.quick-input-widget', { + state: 'hidden', + timeout: PANEL_OPEN_TIMEOUT_MS, + }).catch(() => { + // Some commands update layout without immediately dismissing quick input. + }); +} + +async function getPanelHeight(window: Page): Promise { + return window.evaluate(() => { + const panel = + document.querySelector('[id="workbench.panel.bottom"]') ?? + document.querySelector('.part.panel'); + + return Math.round(panel?.getBoundingClientRect().height ?? 0); + }); +} + +async function ensurePanelIsLarge(window: Page): Promise { + if ((await getPanelHeight(window)) > MIN_PANEL_HEIGHT_PX) { + return; + } + + await runCommand(window, 'View: Toggle Maximized Panel'); + + await expect + .poll(() => getPanelHeight(window), { + message: 'Expected the bottom panel to be resized for the Pixel Agents webview', + timeout: PANEL_OPEN_TIMEOUT_MS, + intervals: [250, 500, 1000], + }) + .toBeGreaterThan(MIN_PANEL_HEIGHT_PX); +} + +/** + * Open the Pixel Agents panel via the Command Palette and wait for the + * "Pixel Agents: Show Panel" command to execute. + */ +export async function openPixelAgentsPanel(window: Page): Promise { + await runCommand(window, 'Pixel Agents: Show Panel'); + + // Wait for the panel container to appear + await window.waitForSelector('[id="workbench.panel.bottom"], .part.panel', { + timeout: PANEL_OPEN_TIMEOUT_MS, + }).catch(() => { + // Panel might not use this id; just continue + }); + + await ensurePanelIsLarge(window); +} + +/** + * Find and return the Pixel Agents webview frame. + * + * VS Code renders WebviewViewProvider content in an