angular
diff --git a/‎docs/environment-reference.md‎
Lines changed: 5 additions & 0 deletions b/‎docs/environment-reference.md‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎report-app/src/app/pages/report-viewer/report-viewer.html‎
Lines changed: 50 additions & 0 deletions b/‎report-app/src/app/pages/report-viewer/report-viewer.html‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎report-app/src/app/pages/report-viewer/report-viewer.ts‎
Lines changed: 26 additions & 0 deletions b/‎report-app/src/app/pages/report-viewer/report-viewer.ts‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎runner/builder/builder-types.ts‎
Lines changed: 99 additions & 0 deletions b/‎runner/builder/builder-types.ts‎
Lines changed: 99 additions & 0 deletions
diff --git a/‎runner/configuration/constants.ts‎
Lines changed: 6 additions & 0 deletions b/‎runner/configuration/constants.ts‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎runner/configuration/environment-local.ts‎
Lines changed: 6 additions & 0 deletions b/‎runner/configuration/environment-local.ts‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎runner/orchestration/build-repair.ts‎
Lines changed: 0 additions & 1 deletion b/‎runner/orchestration/build-repair.ts‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎runner/orchestration/gateway.ts‎
Lines changed: 21 additions & 0 deletions b/‎runner/orchestration/gateway.ts‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎runner/orchestration/gateways/local_gateway.ts‎
Lines changed: 59 additions & 0 deletions b/‎runner/orchestration/gateways/local_gateway.ts‎
Lines changed: 59 additions & 0 deletions
@@ -179,3 +179,8 @@ Defaults to `<package manager> run build`.
 
 Command used to start a local dev server as a part of the evaluation.
 Defaults to `<package manager> run start --port 0`.
+
+### `testCommand`
+
+Command used to run tests against the generated code. If this property is not provided, tests will not be run. The command should exit with code 0 on success and a non-zero exit code on failure. The output from the command (both `stdout` and `stderr`) is captured and used for repair attempts if the tests fail. The test command will time out after 2 minutes.
+
@@ -76,6 +76,20 @@ <h3 class="chart-title">
             />
           </div>
         </div>
+        @if (overview.stats.tests) {
+          <div class="chart-container test-results-details">
+            <h3 class="chart-title">
+              <span class="material-symbols-outlined"> quiz </span>
+              <span>Tests</span>
+            </h3>
+            <div class="summary-card-item">
+              <stacked-bar-chart
+                [data]="testsAsGraphData(overview.stats.tests)"
+                [compact]="true"
+              />
+            </div>
+          </div>
+        }
         @if (overview.stats.runtime) {
           <div class="chart-container">
             <h3 class="chart-title">
@@ -275,6 +289,19 @@ <h2>Generated applications</h2>
                 @if (initialAttempt?.buildResult?.status === 'error') {
                   <span class="status-badge error">Initial build failed</span>
                 }
+
+                <!-- Test status badges -->
+                @if (result.testResult) {
+                  @if (result.testResult.passed) {
+                    @if ((result.testRepairAttempts || 0) > 0) {
+                      <span class="status-badge warning">Tests passed after repair</span>
+                    } @else {
+                      <span class="status-badge success">Tests passed</span>
+                    }
+                  } @else {
+                    <span class="status-badge error">Tests failed</span>
+                  }
+                }
               </div>
             </div>
           </expansion-panel-header>
@@ -350,6 +377,29 @@ <h5>
                 </div>
               </div>
 
+              @if (result.testResult) {
+                <div class="app-details-section">
+                  <h4>Test Results</h4>
+                  <div class="test-summary">
+                    @if (result.testResult.passed) {
+                      <span class="status-text success">✔ Tests passed</span>
+                      @if ((result.testRepairAttempts || 0) > 0) {
+                        <span class="status-text">after {{ result.testRepairAttempts }} repair attempt(s)</span>
+                      }
+                    } @else {
+                      <span class="status-text error">✘ Tests failed</span>
+                    }
+                  </div>
+                  
+                  @if (result.testResult.output) {
+                    <details class="test-output-button">
+                      <summary class="neutral-button">See Test Output</summary>
+                      <pre class="callout neutral code">{{ result.testResult.output }}</pre>
+                    </details>
+                  }
+                </div>
+              }
+
               <div class="app-details-section">
                 <h4>Additional info</h4>
                 @for (attempt of result.attemptDetails; track attempt) {
 
@@ -20,6 +20,7 @@ import {
   LlmResponseFile,
   RunInfo,
   RunSummaryBuilds,
+  RunSummaryTests,
   RuntimeStats,
   ScoreBucket,
   SkippedIndividualAssessment,
@@ -281,6 +282,31 @@ export class ReportViewer {
     ];
   }
 
+  protected testsAsGraphData(tests: RunSummaryTests): StackedBarChartData {
+    return [
+      {
+        label: 'Passed',
+        color: ScoreCssVariable.excellent,
+        value: tests.successfulInitialTests,
+      },
+      {
+        label: 'Passed after repair',
+        color: ScoreCssVariable.great,
+        value: tests.successfulTestsAfterRepair,
+      },
+      {
+        label: 'Failed',
+        color: ScoreCssVariable.poor,
+        value: tests.failedTests,
+      },
+      {
+        label: 'No tests run',
+        color: ScoreCssVariable.neutral,
+        value: tests.noTestsRun,
+      },
+    ];
+  }
+
   protected checksAsGraphData(buckets: ScoreBucket[]): StackedBarChartData {
     return buckets.map((b) => ({
       label: b.nameWithLabels,
 
@@ -0,0 +1,99 @@
+import { ProgressType } from '../progress/progress-logger.js';
+import { PackageSummary } from '@safety-web/types';
+import {
+  AgentOutput,
+  BrowserAgentTaskInput,
+} from '../testing/browser-agent/models.js';
+import { Result } from 'axe-core';
+import { CspViolation } from './auto-csp-types.js';
+
+/**
+ * Represents the message structure used for communication between
+ * the main process and the build worker process.
+ */
+export interface BuildWorkerMessage {
+  directory: string;
+  /** Name of the app. */
+  appName: string;
+  /** Command used to build the app. */
+  buildCommand: string;
+  /** Command used to start a development server. */
+  serveCommand: string;
+  /** Command used to run tests for the app. */
+  testCommand?: string;
+  /**
+   * Whether this application should be invoked via Puppeteer and
+   * runtime errors should be collected and reported.
+   */
+  collectRuntimeErrors?: boolean;
+  /**
+   * Whether to take a screenshot of the application after a successful build.
+   */
+  takeScreenshots?: boolean;
+  /**
+   * Whether or not to perform Axe testing of the application after a successful build.
+   */
+  includeAxeTesting?: boolean;
+
+  /** Whether to enable the auto CSP checks. */
+  enableAutoCsp?: boolean;
+
+  /** User journey browser agent task input */
+  userJourneyAgentTaskInput?: BrowserAgentTaskInput;
+}
+
+export enum BuildResultStatus {
+  SUCCESS = 'success',
+  ERROR = 'error',
+}
+
+export enum BuildErrorType {
+  MISSING_DEPENDENCY = 'Missing Dependency', // "[ERROR] Could not resolve"
+  TYPESCRIPT_ERROR = 'TypeScript Error', // "[ERROR] TS\d+"
+  ANGULAR_DIAGNOSTIC = 'Angular Diagnostic', // "[ERROR] NG\d+"
+  OTHER = 'Other',
+}
+
+export interface BuildResult {
+  status: BuildResultStatus;
+  message: string;
+  errorType?: BuildErrorType;
+  screenshotPngUrl?: string;
+  missingDependency?: string;
+  runtimeErrors?: string;
+  /** JSON report from the Safety Web runner, if available. */
+  safetyWebReportJson?: PackageSummary[];
+  userJourneyAgentOutput: AgentOutput | null;
+  cspViolations?: CspViolation[];
+  axeViolations?: Result[];
+}
+
+export interface BuildResultMessage {
+  type: 'build';
+  payload: BuildResult;
+}
+
+export interface BuildProgressLogMessage {
+  type: 'log';
+  payload: {
+    state: ProgressType;
+    message: string;
+    details?: string;
+  };
+}
+
+export type BuilderProgressLogFn = (
+  state: ProgressType,
+  message: string,
+  details?: string
+) => void;
+
+export type BuildWorkerResponseMessage =
+  | BuildResultMessage
+  | BuildProgressLogMessage;
+
+export enum RepairType {
+  Build = 'Build',
+  Axe = 'Axe',
+  Test = 'Test',
+}
@@ -26,6 +26,12 @@ export const LLM_OUTPUT_DIR = join(rootDir, 'llm-output');
  */
 export const DEFAULT_MAX_REPAIR_ATTEMPTS = 1;
 
+/**
+ * Number of times we'll try to ask LLM to repair a test failure,
+ * providing the test output and the code that causes the problem.
+ */
+export const DEFAULT_MAX_TEST_REPAIR_ATTEMPTS = 1;
+
 /** Name of the folder where we store all generated reports */
 export const REPORTS_ROOT_DIR = join(rootDir, 'reports');
 
 
@@ -35,6 +35,10 @@ export const localEnvironmentConfigSchema = baseEnvironmentConfigSchema.extend({
    * Defaults to `<package manager> run start --port 0`.
    */
   serveCommand: z.string().optional(),
+  /**
+   * Command to run when testing the code.
+   */
+  testCommand: z.string().optional(),
   /**
    * Whether to skip installing dependencies when running evals in the environment.
    * Useful if you're managing dependencies yourself.
@@ -56,6 +60,8 @@ export class LocalEnvironment extends BaseEnvironment {
   readonly buildCommand: string;
   /** Command to run when starting a development server inside the app. */
   readonly serveCommand: string;
+  /** Command to run when starting tests inside the app. */
+  readonly testCommand?: string;
   /**
    * Absolute path at which files specific to this environment are located. Will be merged in
    * with the files from the `projectTemplatePath` to get the final project structure.
 
@@ -28,7 +28,6 @@ import { EvalID, Gateway } from './gateway.js';
  * @param abortSignal An AbortSignal to cancel the operation.
  * @param workerConcurrencyQueue The queue for managing worker concurrency.
  * @param attempts The current attempt number.
- * @param repairType The type of repair being performed.
  * @returns A promise that resolves to the new BuildResult.
  */
 export async function repairAndBuild(
 
@@ -7,6 +7,7 @@ import {
   LlmResponse,
   LlmResponseFile,
   RootPromptDefinition,
+  TestResult,
 } from '../shared-interfaces.js';
 import { BuildResult } from '../workers/builder/builder-types.js';
 
@@ -35,6 +36,16 @@ export interface Gateway<Env extends Environment> {
     abortSignal: AbortSignal
   ): Promise<LlmResponse>;
 
+  repairTest(
+    id: EvalID,
+    requestCtx: LlmGenerateFilesContext,
+    model: string,
+    errorMessage: string,
+    appFiles: LlmResponseFile[],
+    contextFiles: LlmContextFile[],
+    abortSignal: AbortSignal
+  ): Promise<LlmResponse>;
+
   shouldRetryFailedBuilds(evalID: EvalID): boolean;
 
   tryBuild(
@@ -47,6 +58,16 @@ export interface Gateway<Env extends Environment> {
     progress: ProgressLogger
   ): Promise<BuildResult>;
 
+  tryTest(
+    id: EvalID,
+    env: Env,
+    appDirectoryPath: string,
+    rootPromptDef: RootPromptDefinition,
+    workerConcurrencyQueue: PQueue,
+    abortSignal: AbortSignal,
+    progress: ProgressLogger
+  ): Promise<TestResult | null>;
+
   serveBuild<T>(
     id: EvalID,
     env: Env,
 
@@ -13,6 +13,7 @@ import {
   LlmContextFile,
   LlmResponse,
   LlmResponseFile,
+  TestResult,
 } from '../../shared-interfaces.js';
 import { generateCodeWithAI } from '../codegen.js';
 import { EvalID, Gateway } from '../gateway.js';
@@ -66,6 +67,24 @@ export class LocalGateway implements Gateway<LocalEnvironment> {
     );
   }
 
+  async repairTest(
+    _id: EvalID,
+    requestCtx: LlmGenerateFilesContext,
+    model: string,
+    errorMessage: string,
+    appFiles: LlmResponseFile[],
+    contextFiles: LlmContextFile[],
+    abortSignal: AbortSignal
+  ): Promise<LlmResponse> {
+    return await generateCodeWithAI(
+      this.llm,
+      model,
+      requestCtx,
+      contextFiles,
+      abortSignal
+    );
+  }
+
   tryBuild(
     _id: EvalID,
     env: LocalEnvironment,
@@ -106,6 +125,46 @@ export class LocalGateway implements Gateway<LocalEnvironment> {
     );
   }
 
+  tryTest(
+    _id: EvalID,
+    env: LocalEnvironment,
+    appDirectoryPath: string,
+    rootPromptDef: RootPromptDefinition,
+    workerConcurrencyQueue: PQueue,
+    abortSignal: AbortSignal,
+    progress: ProgressLogger
+  ): Promise<TestResult | null> {
+    if (!env.testCommand) {
+      return Promise.resolve(null);
+    }
+    const testParams = {
+      directory: appDirectoryPath,
+      appName: rootPromptDef.name,
+      testCommand: env.testCommand,
+    };
+
+    return workerConcurrencyQueue.add(
+      () =>
+        new Promise<TestResult>((resolve, reject) => {
+          const child: ChildProcess = fork(
+            path.resolve(import.meta.dirname, '../../workers/test/worker.js'),
+            { signal: abortSignal }
+          );
+          child.send(testParams);
+
+          child.on('message', async (result: any) => {
+            await killChildProcessGracefully(child);
+            resolve(result.payload);
+          });
+          child.on('error', async (err) => {
+            await killChildProcessGracefully(child);
+            reject(err);
+          });
+        }),
+      { throwOnTimeout: true }
+    );
+  }
+
   async serveBuild<T>(
     _id: EvalID,
     env: LocalEnvironment,