Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 31 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ concurrency:
jobs:
ci:
runs-on: ubuntu-latest
timeout-minutes: 15
timeout-minutes: 25

steps:
- name: Checkout
Expand Down Expand Up @@ -54,6 +54,12 @@ jobs:
working-directory: webview-ui
run: npm ci

- name: Install Playwright Dependencies
id: install_playwright_deps
if: always() && steps.install_root.outcome == 'success'
run: npx playwright install --with-deps chromium
continue-on-error: true

# --- Quality Checks (blocking) ---

- name: Type Check
Expand All @@ -75,6 +81,13 @@ jobs:
run: npm run lint
continue-on-error: true

- name: Webview Tests
id: webview_test
if: always() && steps.install_webview.outcome == 'success'
working-directory: webview-ui
run: npm test
continue-on-error: true

- name: Format Check
id: format_check
if: always() && steps.install_root.outcome == 'success'
Expand All @@ -91,6 +104,12 @@ jobs:
cd webview-ui && npm run build
continue-on-error: true

- name: E2E Tests
id: e2e_test
if: always() && steps.build.outcome == 'success' && steps.install_playwright_deps.outcome == 'success'
run: npm run e2e
continue-on-error: true

# --- Advisory Checks (non-blocking) ---

- name: Audit Root Dependencies
Expand All @@ -115,11 +134,14 @@ jobs:
SETUP_NODE: ${{ steps.setup_node.outcome }}
INSTALL_ROOT: ${{ steps.install_root.outcome }}
INSTALL_WEBVIEW: ${{ steps.install_webview.outcome }}
INSTALL_PLAYWRIGHT_DEPS: ${{ steps.install_playwright_deps.outcome }}
TYPE_CHECK: ${{ steps.type_check.outcome }}
ROOT_LINT: ${{ steps.root_lint.outcome }}
WEBVIEW_LINT: ${{ steps.webview_lint.outcome }}
WEBVIEW_TEST: ${{ steps.webview_test.outcome }}
FORMAT_CHECK: ${{ steps.format_check.outcome }}
BUILD: ${{ steps.build.outcome }}
E2E_TEST: ${{ steps.e2e_test.outcome }}
AUDIT_ROOT: ${{ steps.audit_root.outcome }}
AUDIT_WEBVIEW: ${{ steps.audit_webview.outcome }}
run: |
Expand All @@ -135,11 +157,14 @@ jobs:
echo "| Setup Node | $(status "$SETUP_NODE") |"
echo "| Install root deps | $(status "$INSTALL_ROOT") |"
echo "| Install webview deps | $(status "$INSTALL_WEBVIEW") |"
echo "| Install Playwright deps | $(status "$INSTALL_PLAYWRIGHT_DEPS") |"
echo "| **Type check** | $(status "$TYPE_CHECK") |"
echo "| **Root lint** | $(status "$ROOT_LINT") |"
echo "| **Webview lint** | $(status "$WEBVIEW_LINT") |"
echo "| **Webview tests** | $(status "$WEBVIEW_TEST") |"
echo "| **Format check** | $(status "$FORMAT_CHECK") |"
echo "| **Build** | $(status "$BUILD") |"
echo "| **E2E tests** | $(status "$E2E_TEST") |"
echo "| Audit root _(advisory)_ | $(status "$AUDIT_ROOT") |"
echo "| Audit webview _(advisory)_ | $(status "$AUDIT_WEBVIEW") |"
} >> "$GITHUB_STEP_SUMMARY"
Expand All @@ -153,16 +178,19 @@ jobs:
SETUP_NODE: ${{ steps.setup_node.outcome }}
INSTALL_ROOT: ${{ steps.install_root.outcome }}
INSTALL_WEBVIEW: ${{ steps.install_webview.outcome }}
INSTALL_PLAYWRIGHT_DEPS: ${{ steps.install_playwright_deps.outcome }}
TYPE_CHECK: ${{ steps.type_check.outcome }}
ROOT_LINT: ${{ steps.root_lint.outcome }}
WEBVIEW_LINT: ${{ steps.webview_lint.outcome }}
WEBVIEW_TEST: ${{ steps.webview_test.outcome }}
FORMAT_CHECK: ${{ steps.format_check.outcome }}
BUILD: ${{ steps.build.outcome }}
E2E_TEST: ${{ steps.e2e_test.outcome }}
run: |
failed=0
for step in CHECKOUT SETUP_NODE INSTALL_ROOT INSTALL_WEBVIEW \
TYPE_CHECK ROOT_LINT WEBVIEW_LINT FORMAT_CHECK \
BUILD; do
INSTALL_PLAYWRIGHT_DEPS TYPE_CHECK ROOT_LINT WEBVIEW_LINT \
WEBVIEW_TEST FORMAT_CHECK BUILD E2E_TEST; do
eval "val=\$$step"
if [ "$val" != "success" ]; then
echo "::error::$step failed"
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ Thumbs.db
.vscode-test/
/.idea

# E2E test artifacts
test-results/
playwright-report/

# Build artifacts
*.vsix
*.map
Expand Down
46 changes: 46 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,52 @@ These conventions are enforced by custom ESLint rules (`eslint-rules/pixel-agent

These rules are set to `warn` — they won't block your PR but will flag violations for cleanup.

## End-to-End Tests

The `e2e/` directory contains Playwright tests that launch a real VS Code instance with the extension loaded in development mode.

### Running e2e tests locally

```bash
# Build the extension first (tests load the compiled output)
npm run build

# Headless (default — uses xvfb-run on Linux)
npm run e2e

# Headed (shows the VS Code window)
npm run e2e:headed

# Step-by-step debug mode
npm run e2e:debug
```

On the first run, `@vscode/test-electron` will download a stable VS Code release into `.vscode-test/` (≈200 MB). Subsequent runs reuse the cache.

### Artifacts

All test artifacts are written to `test-results/e2e/`:

| Path | Contents |
|---|---|
| `test-results/e2e/videos/<test-name>/` | `.webm` screen recording for every test |
| `test-results/e2e/html/` | Playwright HTML report (`npx playwright show-report test-results/e2e/html`) |
| `test-results/e2e/*.png` | Final screenshots saved on failure |

On failure, the test output prints the path to the video for that run.

### Mock claude

Tests never invoke the real `claude` CLI. Instead, a bash script at `e2e/fixtures/mock-claude` is copied into an isolated `bin/` directory and prepended to `PATH` before VS Code starts.

The mock:
1. Parses `--session-id <uuid>` from its arguments.
2. Appends a line to `$HOME/.claude-mock/invocations.log` so tests can assert it was called.
3. Creates `$HOME/.claude/projects/<project-hash>/<session-id>.jsonl` with a minimal init line so the extension's file-watcher can detect the session.
4. Sleeps for 30 s (keeps the terminal alive) then exits.

Each test runs with an isolated `HOME` and `--user-data-dir`, so no test state leaks between runs or into your real VS Code profile.

## Submitting a Pull Request

1. Fork the repo and create a feature branch from `main`
Expand Down
47 changes: 47 additions & 0 deletions e2e/fixtures/mock-claude
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/usr/bin/env bash
# Mock 'claude' executable for Pixel Agents e2e tests.
#
# Behaviour:
# 1. Parses --session-id <id> from args.
# 2. Appends an invocation record to $HOME/.claude-mock/invocations.log.
# 3. Creates the expected JSONL file under $HOME/.claude/projects/<hash>/<id>.jsonl
# using the same path-hash algorithm as agentManager.ts
# (replace every non-[a-zA-Z0-9-] char with '-').
# 4. Writes a minimal valid JSONL line so the extension file-watcher can proceed.
# 5. Stays alive for up to 30 s (tests can kill it once assertions pass).

set -euo pipefail

SESSION_ID=""
PREV=""
for arg in "$@"; do
if [ "$PREV" = "--session-id" ]; then
SESSION_ID="$arg"
fi
PREV="$arg"
done

LOG_DIR="${HOME}/.claude-mock"
mkdir -p "$LOG_DIR"
echo "$(date -Iseconds) session-id=${SESSION_ID} cwd=$(pwd) args=$*" >> "${LOG_DIR}/invocations.log"

if [ -n "$SESSION_ID" ]; then
CWD="$(pwd)"
# Replicate agentManager.ts: workspacePath.replace(/[^a-zA-Z0-9-]/g, '-')
DIR_NAME="$(printf '%s' "$CWD" | tr -cs 'a-zA-Z0-9-' '-')"
PROJECT_DIR="${HOME}/.claude/projects/${DIR_NAME}"
mkdir -p "$PROJECT_DIR"
JSONL_FILE="${PROJECT_DIR}/${SESSION_ID}.jsonl"

# Write a minimal system init line so the extension watcher sees the file.
printf '{"type":"system","subtype":"init","content":"mock-claude-ready"}\n' >> "$JSONL_FILE"
fi

# Stay alive so the VS Code terminal doesn't immediately close.
sleep 30 &
SLEEP_PID=$!

# Clean exit on SIGTERM/SIGINT.
trap 'kill $SLEEP_PID 2>/dev/null; exit 0' SIGTERM SIGINT

wait $SLEEP_PID || true
18 changes: 18 additions & 0 deletions e2e/global-setup.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import { downloadAndUnzipVSCode } from '@vscode/test-electron';
import fs from 'fs';
import path from 'path';

export const VSCODE_CACHE_DIR = path.join(__dirname, '../.vscode-test');
export const VSCODE_PATH_FILE = path.join(VSCODE_CACHE_DIR, 'vscode-executable.txt');

export default async function globalSetup(): Promise<void> {
console.log('[e2e] Ensuring VS Code is downloaded...');
const vscodePath = await downloadAndUnzipVSCode({
version: 'stable',
cachePath: VSCODE_CACHE_DIR,
});
console.log(`[e2e] VS Code executable: ${vscodePath}`);

fs.mkdirSync(VSCODE_CACHE_DIR, { recursive: true });
fs.writeFileSync(VSCODE_PATH_FILE, vscodePath, 'utf8');
}
132 changes: 132 additions & 0 deletions e2e/helpers/launch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
import { _electron as electron } from '@playwright/test';
import type { ElectronApplication, Page } from '@playwright/test';
import fs from 'fs';
import os from 'os';
import path from 'path';

const REPO_ROOT = path.join(__dirname, '../..');
const VSCODE_PATH_FILE = path.join(REPO_ROOT, '.vscode-test/vscode-executable.txt');
const MOCK_CLAUDE_PATH = path.join(REPO_ROOT, 'e2e/fixtures/mock-claude');
const ARTIFACTS_DIR = path.join(REPO_ROOT, 'test-results/e2e');

export interface VSCodeSession {
app: ElectronApplication;
window: Page;
/** Isolated HOME directory for this test session. */
tmpHome: string;
/** Workspace directory opened in VS Code. */
workspaceDir: string;
/** Path to the mock invocations log. */
mockLogFile: string;
cleanup: () => Promise<void>;
}

/**
* Launch VS Code with the Pixel Agents extension loaded in development mode.
*
* Uses an isolated temp HOME and injects the mock `claude` binary at the
* front of PATH so no real Claude CLI is needed.
*/
export async function launchVSCode(testTitle: string): Promise<VSCodeSession> {
const vscodePath = fs.readFileSync(VSCODE_PATH_FILE, 'utf8').trim();

// --- Isolated temp directories ---
const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'pixel-e2e-'));
const tmpHome = path.join(tmpBase, 'home');
const workspaceDir = path.join(tmpBase, 'workspace');
const userDataDir = path.join(tmpBase, 'userdata');
const mockBinDir = path.join(tmpBase, 'bin');

fs.mkdirSync(tmpHome, { recursive: true });
fs.mkdirSync(workspaceDir, { recursive: true });
fs.mkdirSync(userDataDir, { recursive: true });
fs.mkdirSync(mockBinDir, { recursive: true });

// Copy mock-claude into an isolated bin dir and symlink as 'claude'
const mockDest = path.join(mockBinDir, 'claude');
fs.copyFileSync(MOCK_CLAUDE_PATH, mockDest);
fs.chmodSync(mockDest, 0o755);

const mockLogFile = path.join(tmpHome, '.claude-mock', 'invocations.log');

// --- Video output dir ---
const safeTitle = testTitle.replace(/[^a-z0-9]+/gi, '-').toLowerCase();
const videoDir = path.join(ARTIFACTS_DIR, 'videos', safeTitle);
fs.mkdirSync(videoDir, { recursive: true });

// --- Environment for VS Code process ---
const env: Record<string, string> = {
...process.env as Record<string, string>,
HOME: tmpHome,
// Prepend mock bin so 'claude' resolves to our mock
PATH: `${mockBinDir}:${process.env['PATH'] ?? '/usr/local/bin:/usr/bin:/bin'}`,
// Prevent VS Code from trying to talk to real accounts / telemetry
VSCODE_TELEMETRY_DISABLED: '1',
};

// --- VS Code launch args ---
const args = [
// Load our extension in dev mode (this overrides the installed version)
`--extensionDevelopmentPath=${REPO_ROOT}`,
// Disable all other extensions so tests are isolated
'--disable-extensions',
// Isolated user-data (settings, state, etc.)
`--user-data-dir=${userDataDir}`,
// Skip interactive prompts
'--disable-workspace-trust',
'--skip-release-notes',
'--skip-welcome',
'--no-sandbox',
// Open the workspace folder
workspaceDir,
];

const cleanup = async (): Promise<void> => {
try {
if (app) {
await app.close();
}
} catch {
// ignore close errors
}
try {
fs.rmSync(tmpBase, { recursive: true, force: true });
} catch {
// ignore cleanup errors
}
};

let app: ElectronApplication | undefined;

try {
app = await electron.launch({
executablePath: vscodePath,
args,
env,
cwd: workspaceDir,
recordVideo: {
dir: videoDir,
size: { width: 1280, height: 800 },
},
timeout: 60_000,
});

// Electron can expose the window before the page lifecycle events settle.
// The test waits for `.monaco-workbench`, so returning the window here is
// more reliable than waiting on `domcontentloaded` in CI.
const window = await app.firstWindow();

return { app, window, tmpHome, workspaceDir, mockLogFile, cleanup };
} catch (error) {
await cleanup();
throw error;
}
}

/**
* Wait for VS Code's workbench to be fully ready before interacting.
*/
export async function waitForWorkbench(window: Page): Promise<void> {
// VS Code renders a div.monaco-workbench when the shell is ready
await window.waitForSelector('.monaco-workbench', { timeout: 60_000 });
}
Loading
Loading