Kanevry
diff --git a/‎.claude/rules/testing.md‎
Lines changed: 26 additions & 0 deletions b/‎.claude/rules/testing.md‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎CLAUDE.md‎
Lines changed: 6 additions & 4 deletions b/‎CLAUDE.md‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/test-runs/.gitkeep‎ b/‎docs/test-runs/.gitkeep‎
diff --git a/‎docs/test-runs/proof-aiat-pmo-module-2026-05-14.md‎
Lines changed: 116 additions & 0 deletions b/‎docs/test-runs/proof-aiat-pmo-module-2026-05-14.md‎
Lines changed: 116 additions & 0 deletions
diff --git a/‎scripts/lib/config/test.mjs‎
Lines changed: 12 additions & 1 deletion b/‎scripts/lib/config/test.mjs‎
Lines changed: 12 additions & 1 deletion
diff --git a/‎scripts/lib/playwright-driver/runner.mjs‎
Lines changed: 16 additions & 3 deletions b/‎scripts/lib/playwright-driver/runner.mjs‎
Lines changed: 16 additions & 3 deletions
diff --git a/‎…pts/lib/test-runner/profile-registry.mjs‎ ‎scripts/lib/shared/profiles/registry.mjs‎scripts/lib/test-runner/profile-registry.mjs renamed to scripts/lib/shared/profiles/registry.mjs
Lines changed: 2 additions & 2 deletions b/‎…pts/lib/test-runner/profile-registry.mjs‎ ‎scripts/lib/shared/profiles/registry.mjs‎scripts/lib/test-runner/profile-registry.mjs renamed to scripts/lib/shared/profiles/registry.mjs
Lines changed: 2 additions & 2 deletions
diff --git a/‎…ripts/lib/test-runner/profile-schema.mjs‎ ‎scripts/lib/shared/profiles/schema.mjs‎scripts/lib/test-runner/profile-schema.mjs renamed to scripts/lib/shared/profiles/schema.mjs
Lines changed: 8 additions & 1 deletion b/‎…ripts/lib/test-runner/profile-schema.mjs‎ ‎scripts/lib/shared/profiles/schema.mjs‎scripts/lib/test-runner/profile-schema.mjs renamed to scripts/lib/shared/profiles/schema.mjs
Lines changed: 8 additions & 1 deletion
diff --git a/‎scripts/lib/validate/check-peekaboo-driver-canary.mjs‎
Lines changed: 1 addition & 0 deletions b/‎scripts/lib/validate/check-peekaboo-driver-canary.mjs‎
Lines changed: 1 addition & 0 deletions
@@ -95,6 +95,32 @@ globs:
 - Coverage regex: `/All files[^|]*\|[^|]*\s+([\d\.]+)/` extracts percentage for MR badges.
 - Failed tests block merge. No exceptions.
 
+### Shared-Hardware Runner Contention (Mac shell executors)
+
+Shell-executor runners that share a host with an active Claude Code session can be CPU-starved when concurrent Claude processes climb past ~10. Symptom: vitest tests that pass locally in <2min hit `testTimeout` (default `10_000`) on the runner. **This is an operator/concurrency issue, not a test or code regression.** Do not treat it as a flaky-test problem and do not widen timeouts globally to paper over it.
+
+- **Cautionary tale:** Pipeline #3940 (2026-05-14 deep-1) failed with 7 `testTimeout` fails after 34m total (test job 18.7min, gitleaks 7m58s) on the GitLab Mac runner. Same commit, same tests passed locally in <2min (4897p/11s). Local re-run of the 7 failing tests: 90/90 green. Resource probe at session-start showed 14 Claude processes — well above the `concurrent-sessions-warn=5` threshold.
+- **Diagnostic signal:** if local `npm test` is green and CI fails only with `testTimeout` (not assertion failures), check the host's Claude-process count before re-running:
+  ```bash
+  pgrep -fc 'claude' # count of active Claude processes on this host
+  ```
+  A count ≥10 against a shared shell-executor runner is the smoking gun.
+
+**Mitigations, in order of effort:**
+
+1. **Avoid concurrent sessions during CI runs (primary).** Do not start a new Claude Code session in this repo while a CI pipeline is in flight on the same host. The session-start resource-probe banner (threshold `concurrent-sessions-warn=5`) is the active signal — treat it as load-shedding guidance, not a passive note.
+2. **Raise the per-test vitest timeout only when contention is expected:**
+   ```ts
+   // vitest.config.ts — ceiling for a contended Mac runner
+   export default defineConfig({ test: { testTimeout: 30_000 } });
+   ```
+   Trade-off: real hangs take longer to surface. Do not push past `30_000` as a default.
+3. **Offload heavy CI to a dedicated runner** when the pattern becomes recurring — the resource probe is the trigger, not a single failed pipeline.
+
+What this is **NOT**: a test-quality bug. Do not retry, mark `.skip`, or widen timeout values on quiet runners to "stabilise" — that masks real perf regressions where they should be loudest.
+
+Cross-reference: learning id `mac-gitlab-runner-cpu-starvation-under-concurrent-claude-load` in `.orchestrator/metrics/learnings.jsonl` (confidence 0.9). `/evolve` rotates the rule if the signal stops applying.
+
 ## E2E Best Practices
 - Use data-testid attributes for stable selectors.
 - Avoid `page.waitForTimeout()` — use `page.waitForSelector()` or `expect().toBeVisible()`.
 
@@ -227,8 +227,8 @@ Active Cursor hooks: 2 events (`afterFileEdit`, `beforeShellExecution`) routed t
 
 ## Components
 
-- **31 Skills**: bootstrap, session-start, session-plan, wave-executor, session-end, claude-md-drift-check, ecosystem-health, gitlab-ops, quality-gates, discovery, plan, evolve, vault-sync, vault-mirror, daily, docs-orchestrator, skill-creator, mcp-builder, hook-development, architecture, domain-model, ubiquitous-language, autopilot, mode-selector, repo-audit, convergence-monitoring, using-orchestrator, frontmatter-guard, test-runner, playwright-driver, peekaboo-driver (session-start sub-files: `phase-2-5-docs-planning.md`, `phase-4-5-resource-health.md`, `phase-7-5-mode-selector.md`, `phase-8-5-express-path.md`)
-- **11 Commands**: `/session`, `/go`, `/close`, `/discovery`, `/plan`, `/evolve`, `/bootstrap`, `/harness-audit`, `/autopilot`, `/repo-audit`, `/test`
+- **32 Skills**: bootstrap, session-start, session-plan, wave-executor, session-end, claude-md-drift-check, ecosystem-health, gitlab-ops, quality-gates, discovery, plan, evolve, vault-sync, vault-mirror, daily, docs-orchestrator, skill-creator, mcp-builder, hook-development, architecture, domain-model, ubiquitous-language, autopilot, mode-selector, repo-audit, convergence-monitoring, using-orchestrator, frontmatter-guard, test-runner, playwright-driver, peekaboo-driver (session-start sub-files: `phase-2-5-docs-planning.md`, `phase-4-5-resource-health.md`, `phase-7-5-mode-selector.md`, `phase-8-5-express-path.md`)
+- **12 Commands**: `/session`, `/go`, `/close`, `/discovery`, `/plan`, `/evolve`, `/bootstrap`, `/harness-audit`, `/autopilot`, `/repo-audit`, `/test`
 - **11 Agents**: code-implementer, test-writer, ui-developer, db-specialist, security-reviewer, session-reviewer, docs-writer, architect-reviewer, qa-strategist, analyst, ux-evaluator
 - **10 hook event matchers / 10 handlers**: SessionStart (banner + init), PreToolUse/Edit\|Write (scope enforcement), PreToolUse/Bash (destructive-command guard + enforce-commands), PostToolUse (edit validation), Stop (session events), SubagentStop (telemetry), PostToolUseFailure (corrective context), PostToolBatch (wave signal), SubagentStart (telemetry), CwdChanged (cwd-change record). Plus the Clank Event Bus integration in `hooks/_lib/events.mjs`.
 - **Output Styles**: 3 (session-report, wave-summary, finding-report) for consistent reporting
 
@@ -0,0 +1,116 @@
+# /test --target aiat-pmo-module — Value-Proof Report
+
+> **Issue:** [#385](https://gitlab.gotzendorfer.at/infrastructure/session-orchestrator/-/issues/385) — end2end-proof: /test --target aiat-pmo-module (web-gate)
+> **Session:** main-2026-05-14-deep-3 W1 (coord-direct)
+> **Status:** Mechanism + live-execution proven. Coverage-proof partial — rubric-v1 artifact gap surfaced for follow-up.
+
+## Run Metadata
+
+| Field | Value |
+|---|---|
+| Target | `/Users/bernhardg./Projects/intern/aiat-pmo-module/tests/e2e` |
+| Profile | `web-gate` (`.orchestrator/policy/test-profiles.json`) |
+| Driver | `scripts/lib/playwright-driver/runner.mjs` (Playwright 1.x via global npx) |
+| Run-ID | `aiat-pmo-2026-05-14-170021-v3` |
+| Run-Dir | `.orchestrator/metrics/test-runs/aiat-pmo-2026-05-14-170021-v3/` |
+| Started | 2026-05-14T15:00:22.316Z |
+| Duration | 547 ms (test execution); ~30 s wall-clock incl. spawn + reporter |
+| Exit code | 1 (Playwright: ≥1 unexpected failure) — runner.mjs maps to exit 1 per spec |
+| Orchestrator session | `main-2026-05-14-deep-3` |
+| Plugin version | v3.5.0 |
+
+## Stack Setup
+
+Pre-existing (3 h uptime at session start):
+
+```bash
+docker compose -f ~/Projects/intern/aiat-pmo-module/dev/docker-compose.yml ps
+# aiat-pmo-daemon, aiat-pmo-ws, aiat-pmo-espo, aiat-pmo-db (healthy)
+```
+
+Bootstrap (coord-direct W1, ~30 s):
+
+```bash
+cd ~/Projects/intern/aiat-pmo-module/tests/e2e && npm install   # 4 packages
+npx playwright install chromium                                  # 92.4 MiB → ~/Library/Caches/ms-playwright/chromium_headless_shell-1223
+```
+
+Health: `curl http://localhost:8090` → 200 OK (EspoCRM responding).
+
+Env: `tests/e2e/.env` not used; tests read `process.env.ESPOCRM_URL` (defaults to `http://localhost:8090`). No `TEST_INITIATIVE_ID` env var set → most tests conditionally skipped.
+
+## Test Execution Summary
+
+| Metric | Value |
+|---|---|
+| Expected (passed) | 0 |
+| Unexpected (failed) | 1 |
+| Flaky | 0 |
+| Skipped | 31 |
+| Total declared | 32 |
+
+The single failure is `initiative-list.spec.ts:27 — GET /api/v1/Initiative returns 200 with total and list`. The test asserts `expect(response.status()).toBe(200)` but the server returns 401 because no API key / session token was provided in the test environment. The 31 skipped tests all carry conditional `test.skip(!ENV_VAR, '...')` guards; this one is missing that guard, so it executes and fails immediately. **This is a minor finding in `aiat-pmo-module` (missing skip-guard) — not a /test bug.**
+
+Skip distribution (31 across 14 spec files):
+
+| File | Skipped |
+|---|---|
+| `api/restricted-role-403.spec.ts` | 6 |
+| `api/acl-team-isolation.spec.ts` | 3 |
+| `api/auth-token.spec.ts` | 3 |
+| `api/cluster-routing.spec.ts` | 3 |
+| `api/create-via-api-key.spec.ts` | 3 |
+| `api/stale-filter.spec.ts` | 3 |
+| `initiative-auth.spec.ts` | 2 |
+| `api/score-live.spec.ts` | 2 |
+| Remaining 6 spec files | 1 each |
+
+## Artifacts Captured
+
+```
+.orchestrator/metrics/test-runs/aiat-pmo-2026-05-14-170021-v3/
+├── console.log         17 279 B   combined stdout+stderr from npx
+├── exit_code            1 B       Playwright exit code (1)
+├── report/index.html             Playwright HTML reporter output
+├── results.json        27 154 B   Playwright JSON reporter
+└── test-results/                  per-test artifacts
+    ├── .last-run.json
+    └── <test-name-chromium>/      32 sub-dirs
+        └── trace.zip              Playwright trace (`--trace on`)
+```
+
+## ux-evaluator Status — Coverage Gap
+
+ux-evaluator agent **not dispatched** this run. Rationale: the rubric-v1 specifies 4 checks each requiring artifact shapes the current playwright-driver runner does not produce:
+
+| rubric-v1 check | Required artifact | Produced this run? |
+|---|---|---|
+| 1. onboarding-step-count ≤ 7 | AX-tree snapshots (`ax-snapshots/*.yaml` or similar) | ❌ no — peekaboo-style concept, not implemented for web in v1 |
+| 2. axe-violations critical/serious | `axe-*.json` from @axe-core/playwright | ❌ no — soft-skipped (axe-core not in tests/e2e deps) |
+| 3. console-errors visible-to-user | `console.ndjson` structured | ❌ no — only flat `console.log` (combined stdout) |
+| 4. Apple-Liquid-Glass conformance | macOS-only (peekaboo) | n/a — web target |
+
+The agent would have nothing actionable to classify. Two follow-up issues were filed to close this gap (see Findings & Follow-ups below).
+
+## Findings & Follow-ups (filed this session)
+
+| # | Severity | Description | Disposition |
+|---|---|---|---|
+| RUNNER-1 | MED | `runner.mjs:174-180` used Jest/Vitest `--reporter html:<path>,json:<path>` syntax; Playwright canonical is `--reporter=html,json` + `PLAYWRIGHT_HTML_OUTPUT_DIR` / `PLAYWRIGHT_JSON_OUTPUT_FILE` / `PLAYWRIGHT_HTML_OPEN` env vars. | **Fixed inline this session** (coord-direct W1 hotfix; deviation logged in STATE.md). Filed retro issue for the regression-test gap (mechanism-proof dry-run didn't catch this — only live spawn does). |
+| RUNNER-2 | MED | `runner.mjs` does not write rubric-v1 expected artifacts: no `ax-snapshots/`, no `console.ndjson`, no `screenshots/` namespace. Only Playwright-native artifacts. Skips axe-core unconditionally if `@axe-core/playwright` isn't in target's package.json. | **New issue filed** — V2 capture-extension to bridge runner.mjs ↔ rubric-v1. Until then, /test on web targets is mechanism-proven but coverage-proof partial. |
+| TARGET-RESOLUTION | LOW | Runner uses `--target <repo-root>` but tests/e2e is a nested package (own playwright.config.ts + node_modules). First retry failed with "two different versions of @playwright/test" because npx fell back to global. Resolved by passing `--target tests/e2e` directly. Profile registry should grow a `tests-dir` field or runner.mjs should walk for the closest `playwright.config.*`. | **Documented here**; deferred to a future profile-schema enhancement. |
+| AIAT-PMO-INIT-LIST | LOW | `aiat-pmo-module tests/e2e/tests/initiative-list.spec.ts:27` lacks the `test.skip(!AUTH_ENV, …)` guard the other 13 spec files use; fails 401 in any env without auth. | **Cross-repo finding** — not filed here. Will surface to aiat-pmo-module backlog. |
+
+## Re-Run Dedupe Verification
+
+Not exercised this session. The first live run (`aiat-pmo-2026-05-14-165941-retry`, target=repo-root) errored at the spawn level before reporter output. The second run (`aiat-pmo-2026-05-14-170021-v3`, target=tests/e2e) is the first artifact-producing run. A re-run dedupe pass requires reconcile triage, which is gated on the ux-evaluator artifact-shape fix (RUNNER-2 above).
+
+## Conclusion
+
+The /test command's end-to-end pipeline is **mechanically proven** against a real live target: bootstrap → driver spawn → Playwright execution → HTML/JSON reporter → exit-code mapping all work as specified. The reporter-syntax bug (RUNNER-1) blocked the value-proof at first attempt; it was fixed inline using canonical Playwright documentation (https://playwright.dev/docs/test-reporters) sourced via ref-mcp + WebFetch, then re-verified in the same session.
+
+The **coverage-proof is partial**: the runner's artifact shape does not yet match rubric-v1's expectations (RUNNER-2), so the ux-evaluator agent cannot perform its 4-check classification. This is a V2-substrate gap, not a mechanism failure. /test on web targets is usable today for "did Playwright tests pass" answers; value-proof for the agentic UX-rubric flow needs RUNNER-2.
+
+Real findings in the target repo (1 missing skip-guard) demonstrate the pipeline produces actionable, repo-relevant signal even in this stub state.
+
+**Recommendation:** Close #385 with status "mechanism + minimal-coverage proof PARTIAL". File RUNNER-2 as the gating issue for full rubric-v1 coverage on the next /test --target aiat-pmo-module pass.
@@ -6,6 +6,9 @@
  * as part of the /test epic (#378) Track B wiring.
  */
 
+import path from 'node:path';
+import { isPathInside } from '../path-utils.mjs';
+
 /**
  * Parse the top-level `test:` YAML block from markdown content.
  * Returns defaults when the block is absent.
@@ -71,7 +74,15 @@ export function _parseTest(content) {
         if (v) tcDefaultProfile = v;
         break;
       case 'profiles-path':
-        if (v) tcProfilesPath = v;
+        if (v) {
+          // SEC-IR-LOW-2: reject path-traversal in profiles-path (CWE-23)
+          const projectRoot = process.cwd();
+          const resolved = path.resolve(projectRoot, v);
+          if (isPathInside(resolved, projectRoot)) {
+            tcProfilesPath = v;
+          }
+          // Silent skip on traversal — matches the lenient pattern used by other case branches
+        }
         break;
       case 'mode':
         if (validModes.has(v.toLowerCase())) tcMode = v.toLowerCase();
 
@@ -29,7 +29,7 @@ import realFs from 'node:fs';
 import os from 'node:os';
 import path from 'node:path';
 import { parseArgs } from 'node:util';
-import { getProfile, loadProfiles, validateProfile } from '../test-runner/profile-registry.mjs';
+import { getProfile, loadProfiles, validateProfile } from '../shared/profiles/registry.mjs';
 
 // ---------------------------------------------------------------------------
 // Path resolution helper
@@ -117,7 +117,10 @@ export default async function run(opts = {}) {
     process.exit(2);
   }
 
-  // SEC-PD-MED-2: reject runDir values that would break the --reporter "html:...,json:..." comma+colon split
+  // SEC-PD-MED-2: reject runDir values containing commas or colons as defensive
+  // path-sanitization (original rationale: reporter comma+colon split is now obsolete
+  // after the 2026-05-14 deep-3 reporter fix — paths flow via env vars, not CLI string).
+  // Kept as belt-and-braces against pathological run-dirs; revisit alongside #390 path-traversal.
   if (/[,:]/.test(runDir)) {
     console.error('runner: --run-dir must not contain commas or colons');
     process.exit(2);
@@ -171,11 +174,15 @@ export default async function run(opts = {}) {
   // Build Playwright args
   // -------------------------------------------------------------------------
 
+  // Playwright reporter syntax: comma-separated reporter NAMES, paths via env vars.
+  // Canonical: https://playwright.dev/docs/test-reporters#html-reporter (PLAYWRIGHT_HTML_OUTPUT_DIR)
+  // and #json-reporter (PLAYWRIGHT_JSON_OUTPUT_FILE). The previous `html:<path>` form
+  // was Jest/Vitest syntax — Playwright rejected it as `Cannot find module 'html:<path>'`.
   const playwrightArgs = [
     'playwright',
     'test',
     '--output', path.join(runDir, 'test-results'),
-    '--reporter', `html:${path.join(runDir, 'report')},json:${path.join(runDir, 'results.json')}`,
+    '--reporter', 'html,json',
     '--trace', 'on',
   ];
 
@@ -212,6 +219,12 @@ export default async function run(opts = {}) {
     cwd: targetPath,
     signal: controller.signal,
     stdio: ['ignore', 'pipe', 'pipe'],
+    env: {
+      ...process.env,
+      PLAYWRIGHT_HTML_OUTPUT_DIR: path.join(runDir, 'report'),
+      PLAYWRIGHT_JSON_OUTPUT_FILE: path.join(runDir, 'results.json'),
+      PLAYWRIGHT_HTML_OPEN: 'never',
+    },
   });
 
   proc.stdout.pipe(logStream, { end: false });
 
@@ -1,5 +1,5 @@
 /**
- * test-runner/profile-registry.mjs — Pure loader for test profile registry.
+ * shared/profiles/registry.mjs — Pure loader for test profile registry.
  *
  * Reads `.orchestrator/policy/test-profiles.json`, validates each entry
  * against profileRegistrySchema, and exposes pure helper accessors.
@@ -16,7 +16,7 @@
  */
 
 import fsPromises from 'node:fs/promises';
-import { profileEntrySchema, profileRegistrySchema } from './profile-schema.mjs';
+import { profileEntrySchema, profileRegistrySchema } from './schema.mjs';
 
 // ---------------------------------------------------------------------------
 // Default path
 
@@ -1,5 +1,5 @@
 /**
- * test-runner/profile-schema.mjs — Validation schemas for test profile entries.
+ * shared/profiles/schema.mjs — Validation schemas for test profile entries.
  *
  * Zod was not available in this project's node_modules at implementation time
  * (issue #383 part 3), so validation is implemented as a hand-rolled validator
@@ -13,6 +13,8 @@
  * `{ success: true, data }` or `{ success: false, error: ZodLike }`.
  */
 
+import { isPathInside } from '../../path-utils.mjs';
+
 // ---------------------------------------------------------------------------
 // Validation helpers
 // ---------------------------------------------------------------------------
@@ -92,6 +94,11 @@ function parseProfileEntry(value) {
   if (typeof rubric !== 'string') {
     return { success: false, error: makeError('rubric must be a string') };
   }
+  // SEC-IR-LOW-3: rubric must stay within project root
+  const projectRoot = process.cwd();
+  if (!isPathInside(rubric, projectRoot)) {
+    return { success: false, error: makeError('rubric path escapes project root') };
+  }
 
   // checks (optional array of strings)
   if (v.checks !== undefined) {
 
@@ -77,6 +77,7 @@ const DOCUMENTATION_MARKERS = [
 const SCAN_ROOTS = [
   'skills/peekaboo-driver',
   'scripts/lib/test-runner',
+  'scripts/lib/shared/profiles',
 ];
 
 const SCAN_EXTENSIONS = ['.md', '.mjs', '.js', '.ts'];