Skip to content

Commit 4122cf6

Browse files
committed
feat: remote environment support
1 parent de8288f commit 4122cf6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1758
-1218
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import { getBuiltInRatings } from 'web-codegen-scorer';
2+
3+
/** @type {import("web-codegen-scorer").EnvironmentConfig} */
4+
export default {
5+
displayName: 'Remote Env (example)',
6+
clientSideFramework: 'angular',
7+
sourceDirectory: './project',
8+
ratings: getBuiltInRatings(),
9+
generationSystemPrompt: './system-instructions.md',
10+
executablePrompts: ['../../prompts/**/*.md'],
11+
packageManager: 'npm',
12+
};

report-app/src/app/pages/report-viewer/report-viewer.html

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -257,22 +257,22 @@ <h2>Generated applications</h2>
257257
{{ result.promptDef.name }}
258258

259259
<div class="status-badge-group">
260-
@let initialBuild = result.attemptDetails[0].buildResult;
261-
@let repairBuild =
260+
@let initialAttempt = result.attemptDetails[0];
261+
@let repairAttempt =
262262
result.attemptDetails.length > 1
263-
? result.attemptDetails[1].buildResult
263+
? result.attemptDetails[1]
264264
: null;
265-
@let finalBuild = repairBuild ?? initialBuild;
265+
@let finalAttempt = result.attemptDetails.at(-1)!;
266266

267-
@if (finalBuild.runtimeErrors) {
267+
@if (finalAttempt.serveTestingResult?.runtimeErrors) {
268268
<span class="status-badge error">Runtime error</span>
269269
}
270270

271-
@if (repairBuild?.status === 'error') {
271+
@if (repairAttempt?.buildResult?.status === 'error') {
272272
<span class="status-badge error">Build after repair</span>
273273
}
274274

275-
@if (initialBuild.status === 'error') {
275+
@if (initialAttempt?.buildResult?.status === 'error') {
276276
<span class="status-badge error">Initial build failed</span>
277277
}
278278
</div>
@@ -354,7 +354,8 @@ <h5>
354354
<h4>Additional info</h4>
355355
@for (attempt of result.attemptDetails; track attempt) {
356356
@let isBuilt = attempt.buildResult.status === 'success';
357-
@let axeViolations = attempt.buildResult.axeViolations;
357+
@let axeViolations =
358+
attempt.serveTestingResult?.axeViolations;
358359
@let hasAxeViolations =
359360
axeViolations && axeViolations.length > 0;
360361

@@ -501,12 +502,12 @@ <h5>Response</h5>
501502
}
502503
</div>
503504

504-
@if (finalBuild.runtimeErrors) {
505+
@let finalRuntimeErrors =
506+
finalAttempt.serveTestingResult?.runtimeErrors;
507+
@if (finalRuntimeErrors) {
505508
<div class="app-details-section">
506509
<h4>Runtime errors</h4>
507-
<pre class="callout warn code">{{
508-
finalBuild.runtimeErrors
509-
}}</pre>
510+
<pre class="callout warn code">{{ finalRuntimeErrors }}</pre>
510511
</div>
511512
}
512513

report-app/src/app/pages/report-viewer/report-viewer.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import {
1212
viewChild,
1313
} from '@angular/core';
1414
import { NgxJsonViewerModule } from 'ngx-json-viewer';
15-
import { BuildErrorType } from '../../../../../runner/builder/builder-types';
15+
import { BuildErrorType } from '../../../../../runner/workers/builder/builder-types';
1616
import {
1717
AssessmentResult,
1818
IndividualAssessment,
@@ -237,7 +237,7 @@ export class ReportViewer {
237237
});
238238

239239
protected getScreenshotUrl(result: AssessmentResult): string | null {
240-
return result.build.screenshotPngUrl ?? null;
240+
return result.finalAttempt.serveTestingResult?.screenshotPngUrl ?? null;
241241
}
242242

243243
protected isLoading = this.reportsFetcher.isLoadingSingleReport;

report-app/src/app/shared/debugging-zip.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { BuildResultStatus } from '../../../../runner/builder/builder-types';
1+
import { BuildResultStatus } from '../../../../runner/workers/builder/builder-types';
22
import {
33
AssessmentResult,
44
RunInfo,
@@ -31,11 +31,11 @@ export async function createPromptDebuggingZip(
3131
zip.file('generated-files.md', generatedFiles);
3232

3333
let errors = ``;
34-
if (app.build.runtimeErrors) {
35-
errors += `## Runtime errors\n${app.build.runtimeErrors}\n`;
34+
if (app.finalAttempt.serveTestingResult?.runtimeErrors) {
35+
errors += `## Runtime errors\n${app.finalAttempt.serveTestingResult?.runtimeErrors}\n`;
3636
}
37-
if (app.build.status === BuildResultStatus.ERROR) {
38-
errors += `## Build error\n ${app.build.message}`;
37+
if (app.finalAttempt.buildResult.status === BuildResultStatus.ERROR) {
38+
errors += `## Build error\n ${app.finalAttempt.buildResult.message}`;
3939
}
4040

4141
zip.file('errors.md', errors);

runner/builder/builder-types.ts

Lines changed: 0 additions & 96 deletions
This file was deleted.

runner/codegen/gemini-cli/gemini-cli-runner.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import {
1818
import { DirectorySnapshot } from './directory-snapshot.js';
1919
import { LlmResponseFile } from '../../shared-interfaces.js';
2020
import { UserFacingError } from '../../utils/errors.js';
21+
import assert from 'assert';
2122

2223
const SUPPORTED_MODELS = [
2324
'gemini-2.5-pro',
@@ -45,6 +46,19 @@ export class GeminiCliRunner implements LlmRunner {
4546
options: LlmGenerateFilesRequestOptions
4647
): Promise<LlmGenerateFilesResponse> {
4748
const { context, model } = options;
49+
50+
// TODO: Consider removing these assertions when we have better types here.
51+
// These fields are always set when running in a local environment, and this
52+
// is a requirement for selecting the `gemini-cli` runner.
53+
assert(
54+
context.buildCommand,
55+
'Expected a `buildCommand` to be set in the LLM generate request context'
56+
);
57+
assert(
58+
context.packageManager,
59+
'Expected a `packageManager` to be set in the LLM generate request context'
60+
);
61+
4862
const ignoreFilePath = join(context.directory, '.geminiignore');
4963
const instructionFilePath = join(context.directory, 'GEMINI.md');
5064
const settingsDir = join(context.directory, '.gemini');

runner/codegen/llm-runner.ts

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,24 @@ export interface LlmGenerateFilesContext {
9898
/**
9999
* Combined system instructions and prompt for the environments
100100
* where the two can't be provided separately.
101+
*
102+
* TODO(crisbeto): Can we explain the reason for this better?
101103
*/
102104
combinedPrompt: string;
103105
/** Directory in which the generation will occur. */
104106
directory: string;
105-
/** Command that the LLM can use to verify that the build works. */
106-
buildCommand: string;
107-
/** Package manager that the LLM can use. */
108-
packageManager: string;
107+
/**
108+
* Command that the LLM can use to verify that the build works.
109+
*
110+
* Can be `null` for remote environments.
111+
*/
112+
buildCommand: string | undefined;
113+
/**
114+
* Package manager that the LLM can use.
115+
*
116+
* Can be `null` for remote environments.
117+
*/
118+
packageManager: string | undefined;
109119
/** All available package managers supported by the runner. */
110120
possiblePackageManagers: string[];
111121
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import z from 'zod';
2+
import { ratingSchema } from '../ratings/rating-types.js';
3+
import { MultiStepPrompt } from './multi-step-prompt.js';
4+
import { mcpServerOptionsSchema } from '../codegen/llm-runner.js';
5+
import { getPossiblePackageManagers } from './environment-config.js';
6+
7+
export const baseEnvironmentConfigSchema = z.strictObject({
8+
/** Display name for the environment. */
9+
displayName: z.string(),
10+
/**
11+
* Optional unique ID for the environment.
12+
* If one isn't provided, it will be computed from the `displayName`.
13+
*/
14+
id: z.string().optional(),
15+
/** ID of the client-side framework used within the environment. */
16+
clientSideFramework: z.string(),
17+
/** Ratings to run when evaluating the environment. */
18+
ratings: z.array(ratingSchema),
19+
/** Path to the prompt used by the LLM for generating files. */
20+
generationSystemPrompt: z.string(),
21+
/**
22+
* Path to the prompt used by the LLM for repairing builds or failures.
23+
*
24+
* If unset or `null`, the eval tool will use its default repair instructions.
25+
*/
26+
repairSystemPrompt: z.union([z.string(), z.null()]).optional(),
27+
/**
28+
* Path to the prompt used by the LLM for editing.
29+
*
30+
* Prompts running after the initial generation are considered as editing (e.g. multi step prompts).
31+
* If `null`, the eval tool will use the generation prompt for edits.
32+
*/
33+
editingSystemPrompt: z.union([z.string(), z.null()]).optional(),
34+
/** Prompts that should be sent to the LLM and written into the output. */
35+
executablePrompts: z.array(
36+
z.union([
37+
z.string(),
38+
z.strictObject({
39+
path: z.string(),
40+
name: z.string().optional(),
41+
ratings: z.array(ratingSchema).optional(),
42+
}),
43+
z.custom<MultiStepPrompt>((data) => data instanceof MultiStepPrompt),
44+
])
45+
),
46+
/** MCP servers that can be started for this environment. */
47+
mcpServers: z.array(mcpServerOptionsSchema).optional(),
48+
/** Relative path to the environment's source code in which to generate new code. */
49+
sourceDirectory: z.string().optional(),
50+
/**
51+
* Path to the template directory to use when creating
52+
* the project which the LLM will run against.
53+
*/
54+
projectTemplate: z.string().optional(),
55+
/** Package manager to use for the eval. */
56+
packageManager: z.enum(getPossiblePackageManagers()).optional(),
57+
/**
58+
* Command to run when building the generated code.
59+
* Defaults to `<package manager> run build`.
60+
*/
61+
buildCommand: z.string().optional(),
62+
/**
63+
* Command to run when starting a development server inside the app.
64+
* Defaults to `<package manager> run start --port 0`.
65+
*/
66+
serveCommand: z.string().optional(),
67+
/**
68+
* Whether to skip installing dependencies when running evals in the environment.
69+
* Useful if you're managing dependencies yourself.
70+
*/
71+
skipInstall: z.boolean().optional(),
72+
/**
73+
* ID of the fullstack framework used within the environment.
74+
* If omitted, it will default to the `clientSideFramework`.
75+
*/
76+
fullStackFramework: z.string().optional(),
77+
/** Path to the prompt to use when rating code. */
78+
codeRatingPrompt: z.string().optional(),
79+
/** When enabled, the system prompts for this environment won't be included in the report. */
80+
classifyPrompts: z.boolean().optional(),
81+
});

0 commit comments

Comments
 (0)