Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions cmd/browsers.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ func getAvailableViewports() []string {
"1440x900@25",
"1024x768@60",
"1200x800@60",
"1280x800@60",
}
}

Expand Down Expand Up @@ -2069,7 +2070,7 @@ func init() {
browsersUpdateCmd.Flags().String("profile-id", "", "Profile ID to load into the browser session (mutually exclusive with --profile-name)")
browsersUpdateCmd.Flags().String("profile-name", "", "Profile name to load into the browser session (mutually exclusive with --profile-id)")
browsersUpdateCmd.Flags().Bool("save-changes", false, "If set, save changes back to the profile when the session ends")
browsersUpdateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60")
browsersUpdateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60, 1280x800@60")

browsersCmd.AddCommand(browsersListCmd)
browsersCmd.AddCommand(browsersCreateCmd)
Expand Down Expand Up @@ -2304,7 +2305,7 @@ func init() {
browsersCreateCmd.Flags().Bool("save-changes", false, "If set, save changes back to the profile when the session ends")
browsersCreateCmd.Flags().String("proxy-id", "", "Proxy ID to use for the browser session")
browsersCreateCmd.Flags().StringSlice("extension", []string{}, "Extension IDs or names to load (repeatable; may be passed multiple times or comma-separated)")
browsersCreateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60")
browsersCreateCmd.Flags().String("viewport", "", "Browser viewport size (e.g., 1920x1080@25). Supported: 2560x1440@10, 1920x1080@25, 1920x1200@25, 1440x900@25, 1024x768@60, 1200x800@60, 1280x800@60")
browsersCreateCmd.Flags().Bool("viewport-interactive", false, "Interactively select viewport size from list")
browsersCreateCmd.Flags().String("pool-id", "", "Browser pool ID to acquire from (mutually exclusive with --pool-name)")
browsersCreateCmd.Flags().String("pool-name", "", "Browser pool name to acquire from (mutually exclusive with --pool-id)")
Expand Down
4 changes: 2 additions & 2 deletions cmd/browsers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,6 @@ func TestBrowsersDelete_Failure(t *testing.T) {
assert.True(t, strings.Contains(errMsg, "right failed") || strings.Contains(errMsg, "left failed"), "expected error message to contain either 'right failed' or 'left failed', got: %s", errMsg)
}


func TestBrowsersView_ByID_PrintsURL(t *testing.T) {
// Capture both pterm output and raw stdout
setupStdoutCapture(t)
Expand Down Expand Up @@ -1147,12 +1146,13 @@ func TestParseViewport_InvalidFormats(t *testing.T) {

func TestGetAvailableViewports_ReturnsExpectedOptions(t *testing.T) {
viewports := getAvailableViewports()
assert.Len(t, viewports, 6)
assert.Len(t, viewports, 7)
assert.Contains(t, viewports, "2560x1440@10")
assert.Contains(t, viewports, "1920x1080@25")
assert.Contains(t, viewports, "1920x1200@25")
assert.Contains(t, viewports, "1440x900@25")
assert.Contains(t, viewports, "1200x800@60")
assert.Contains(t, viewports, "1280x800@60")
assert.Contains(t, viewports, "1024x768@60")
}

Expand Down
4 changes: 3 additions & 1 deletion pkg/templates/python/anthropic-computer-use/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ async def sampling_loop(
tool_version: ToolVersion = "computer_use_20250124",
thinking_budget: int | None = None,
token_efficient_tools_beta: bool = False,
viewport_width: int = 1280,
viewport_height: int = 800,
):
"""
Agentic sampling loop for the assistant/tool interaction of computer use.
Expand All @@ -99,7 +101,7 @@ async def sampling_loop(
tool_group = TOOL_GROUPS_BY_VERSION[tool_version]
tool_collection = ToolCollection(
*(
ToolCls(kernel=kernel, session_id=session_id) if ToolCls.__name__.startswith("ComputerTool") else ToolCls()
ToolCls(kernel=kernel, session_id=session_id, width=viewport_width, height=viewport_height) if ToolCls.__name__.startswith("ComputerTool") else ToolCls()
for ToolCls in tool_group.tools
)
)
Expand Down
8 changes: 5 additions & 3 deletions pkg/templates/python/anthropic-computer-use/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class KernelBrowserSession:
stealth: bool = True
timeout_seconds: int = 300

viewport_width: int = 1280
viewport_height: int = 800

# Replay recording options
record_replay: bool = False
replay_grace_period: float = 5.0 # Seconds to wait before stopping replay
Expand All @@ -52,9 +55,8 @@ async def __aenter__(self) -> "KernelBrowserSession":
stealth=self.stealth,
timeout_seconds=self.timeout_seconds,
viewport={
"width": 1024,
"height": 768,
"refresh_rate": 60,
"width": self.viewport_width,
"height": self.viewport_height,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Viewport refresh_rate removed without documentation or fallback

Medium Severity

The refresh_rate property was removed from the viewport configuration when creating browser sessions. Previously, Anthropic templates specified refresh_rate: 60 and Yutori templates specified refresh_rate: 25. The allowed viewports list requires specific refresh rates (e.g., 1280x800@60). If the API doesn't default to the correct refresh rate, browser creation could fail with viewport validation errors. This removal was not mentioned in the PR description.

Additional Locations (1)

Fix in Cursor Fix in Web

},
)

Expand Down
6 changes: 3 additions & 3 deletions pkg/templates/python/anthropic-computer-use/tools/computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,6 @@ class BaseComputerTool:
"""

name: Literal["computer"] = "computer"
width: int = 1024
height: int = 768
display_num: int | None = None

# Kernel client and session
Expand All @@ -127,10 +125,12 @@ def options(self) -> ComputerToolOptions:
"display_number": self.display_num,
}

def __init__(self, kernel: Kernel | None = None, session_id: str | None = None):
def __init__(self, kernel: Kernel | None = None, session_id: str | None = None, width: int = 1280, height: int = 800):
super().__init__()
self.kernel = kernel
self.session_id = session_id
self.width = width
self.height = height

def validate_coordinates(self, coordinate: tuple[int, int] | list[int] | None = None) -> tuple[int, int] | None:
"""Validate that coordinates are non-negative integers and convert lists to tuples if needed."""
Expand Down
4 changes: 2 additions & 2 deletions pkg/templates/python/yutori-computer-use/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ When enabled, the response will include a `replay_url` field with a link to view

## Viewport Configuration

Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. Kernel's closest supported viewport is **1200×800 at 25Hz**, which this template uses by default.
Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy.

> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. The slight width difference (1200 vs 1280) should have minimal impact on accuracy.
> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions.

See [Kernel Viewport Documentation](https://www.kernel.sh/docs/browsers/viewport) for all supported configurations.

Expand Down
2 changes: 1 addition & 1 deletion pkg/templates/python/yutori-computer-use/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ async def sampling_loop(
cdp_ws_url: Optional[str] = None,
max_tokens: int = 4096,
max_iterations: int = 50,
viewport_width: int = 1200,
viewport_width: int = 1280,
viewport_height: int = 800,
mode: BrowserMode = "computer_use",
) -> dict[str, Any]:
Expand Down
3 changes: 1 addition & 2 deletions pkg/templates/python/yutori-computer-use/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class KernelBrowserSession:
stealth: bool = True
timeout_seconds: int = 300

viewport_width: int = 1200
viewport_width: int = 1280
viewport_height: int = 800

# Replay recording options
Expand All @@ -56,7 +56,6 @@ async def __aenter__(self) -> "KernelBrowserSession":
viewport={
"width": self.viewport_width,
"height": self.viewport_height,
"refresh_rate": 25,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yutori templates not updated to 1280x800 as claimed

Medium Severity

The PR description states it "Updated Yutori computer-use templates (TypeScript & Python) to default to 1280x800 viewport instead of 1200x800", but the Yutori templates still default to 1200x800. The only change made to the Yutori Python template was removing refresh_rate: 25 - the viewport_width and viewport_height defaults remain at 1200x800 in both the Python and TypeScript Yutori templates (in session.py, loop.py, session.ts, and loop.ts). The Anthropic templates were correctly updated to 1280x800, but the Yutori templates were missed.

Fix in Cursor Fix in Web

},
)

Expand Down
2 changes: 1 addition & 1 deletion pkg/templates/python/yutori-computer-use/tools/computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class N1Action(TypedDict, total=False):


class ComputerTool:
def __init__(self, kernel: Kernel, session_id: str, width: int = 1200, height: int = 800):
def __init__(self, kernel: Kernel, session_id: str, width: int = 1280, height: int = 800):
self.kernel = kernel
self.session_id = session_id
self.width = width
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@


class PlaywrightComputerTool:
def __init__(self, cdp_ws_url: str, width: int = 1200, height: int = 800):
def __init__(self, cdp_ws_url: str, width: int = 1280, height: int = 800):
self.cdp_ws_url = cdp_ws_url
self.width = width
self.height = height
Expand Down
6 changes: 5 additions & 1 deletion pkg/templates/typescript/anthropic-computer-use/loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ export async function samplingLoop({
tokenEfficientToolsBeta = false,
kernel,
sessionId,
viewportWidth = 1280,
viewportHeight = 800,
}: {
model: string;
systemPromptSuffix?: string;
Expand All @@ -69,10 +71,12 @@ export async function samplingLoop({
tokenEfficientToolsBeta?: boolean;
kernel: Kernel;
sessionId: string;
viewportWidth?: number;
viewportHeight?: number;
}): Promise<BetaMessageParam[]> {
const selectedVersion = toolVersion || DEFAULT_TOOL_VERSION;
const toolGroup = TOOL_GROUPS_BY_VERSION[selectedVersion];
const toolCollection = new ToolCollection(...toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(kernel, sessionId)));
const toolCollection = new ToolCollection(...toolGroup.tools.map((Tool: typeof ComputerTool20241022 | typeof ComputerTool20250124) => new Tool(kernel, sessionId, viewportWidth, viewportHeight)));

const system: BetaTextBlock = {
type: 'text',
Expand Down
23 changes: 20 additions & 3 deletions pkg/templates/typescript/anthropic-computer-use/session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,28 @@ export interface SessionOptions {
recordReplay?: boolean;
/** Grace period in seconds before stopping replay */
replayGracePeriod?: number;
/** Viewport width */
viewportWidth?: number;
/** Viewport height */
viewportHeight?: number;
}

export interface SessionInfo {
sessionId: string;
liveViewUrl: string;
replayId?: string;
replayViewUrl?: string;
viewportWidth: number;
viewportHeight: number;
}

const DEFAULT_OPTIONS: Required<SessionOptions> = {
stealth: true,
timeoutSeconds: 300,
recordReplay: false,
replayGracePeriod: 5.0,
viewportWidth: 1280,
viewportHeight: 800,
};

/**
Expand Down Expand Up @@ -76,12 +84,22 @@ export class KernelBrowserSession {
return this._replayViewUrl;
}

get viewportWidth(): number {
return this.options.viewportWidth;
}

get viewportHeight(): number {
return this.options.viewportHeight;
}

get info(): SessionInfo {
return {
sessionId: this.sessionId,
liveViewUrl: this._liveViewUrl || '',
replayId: this._replayId || undefined,
replayViewUrl: this._replayViewUrl || undefined,
viewportWidth: this.options.viewportWidth,
viewportHeight: this.options.viewportHeight,
};
}

Expand All @@ -94,9 +112,8 @@ export class KernelBrowserSession {
stealth: this.options.stealth,
timeout_seconds: this.options.timeoutSeconds,
viewport: {
width: 1024,
height: 768,
refresh_rate: 60,
width: this.options.viewportWidth,
height: this.options.viewportHeight,
},
});

Expand Down
18 changes: 11 additions & 7 deletions pkg/templates/typescript/anthropic-computer-use/tools/computer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ export class ComputerTool implements BaseAnthropicTool {
protected sessionId: string;
protected _screenshotDelay = 2.0;
protected version: '20241022' | '20250124';
protected width: number;
protected height: number;

private lastMousePosition: [number, number] = [0, 0];

Expand Down Expand Up @@ -39,10 +41,12 @@ export class ComputerTool implements BaseAnthropicTool {
Action.WAIT,
]);

constructor(kernel: Kernel, sessionId: string, version: '20241022' | '20250124' = '20250124') {
constructor(kernel: Kernel, sessionId: string, version: '20241022' | '20250124' = '20250124', width = 1280, height = 800) {
this.kernel = kernel;
this.sessionId = sessionId;
this.version = version;
this.width = width;
this.height = height;
}

get apiType(): 'computer_20241022' | 'computer_20250124' {
Expand All @@ -53,8 +57,8 @@ export class ComputerTool implements BaseAnthropicTool {
const params = {
name: this.name,
type: this.apiType,
display_width_px: 1024,
display_height_px: 768,
display_width_px: this.width,
display_height_px: this.height,
display_number: null,
};
return params;
Expand Down Expand Up @@ -380,13 +384,13 @@ export class ComputerTool implements BaseAnthropicTool {

// For backward compatibility
export class ComputerTool20241022 extends ComputerTool {
constructor(kernel: Kernel, sessionId: string) {
super(kernel, sessionId, '20241022');
constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) {
super(kernel, sessionId, '20241022', width, height);
}
}

export class ComputerTool20250124 extends ComputerTool {
constructor(kernel: Kernel, sessionId: string) {
super(kernel, sessionId, '20250124');
constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) {
super(kernel, sessionId, '20250124', width, height);
}
}
4 changes: 2 additions & 2 deletions pkg/templates/typescript/yutori-computer-use/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ When enabled, the response will include a `replay_url` field with a link to view

## Viewport Configuration

Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy. Kernel's closest supported viewport is **1200×800 at 25Hz**, which this template uses by default.
Yutori n1 recommends a **1280×800 (WXGA, 16:10)** viewport for best grounding accuracy.

> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions. The slight width difference (1200 vs 1280) should have minimal impact on accuracy.
> **Note:** n1 outputs coordinates in a 1000×1000 relative space, which are automatically scaled to the actual viewport dimensions.

See [Kernel Viewport Documentation](https://www.kernel.sh/docs/browsers/viewport) for all supported configurations.

Expand Down
7 changes: 3 additions & 4 deletions pkg/templates/typescript/yutori-computer-use/loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ interface SamplingLoopOptions {
cdpWsUrl?: string;
maxTokens?: number;
maxIterations?: number;
/** Viewport width for coordinate scaling (default: 1200, closest to Yutori's 1280 recommendation) */
/** Viewport width for coordinate scaling */
viewportWidth?: number;
/** Viewport height for coordinate scaling (default: 800 per Yutori recommendation) */
/** Viewport height for coordinate scaling */
viewportHeight?: number;
/**
* Browser interaction mode:
Expand All @@ -80,8 +80,7 @@ export async function samplingLoop({
cdpWsUrl,
maxTokens = 4096,
maxIterations = 50,
// Default viewport: 1200x800 (closest Kernel-supported size to Yutori's recommended 1280x800)
viewportWidth = 1200,
viewportWidth = 1280,
viewportHeight = 800,
mode = 'computer_use',
}: SamplingLoopOptions): Promise<SamplingLoopResult> {
Expand Down
7 changes: 3 additions & 4 deletions pkg/templates/typescript/yutori-computer-use/session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ export interface SessionOptions {
recordReplay?: boolean;
/** Grace period in seconds before stopping replay */
replayGracePeriod?: number;
/** Viewport width (default: 1200, closest to Yutori's 1280 recommendation) */
/** Viewport width */
viewportWidth?: number;
/** Viewport height (default: 800 per Yutori recommendation) */
/** Viewport height */
viewportHeight?: number;
}

Expand All @@ -37,7 +37,7 @@ const DEFAULT_OPTIONS: Required<SessionOptions> = {
timeoutSeconds: 300,
recordReplay: false,
replayGracePeriod: 5.0,
viewportWidth: 1200,
viewportWidth: 1280,
viewportHeight: 800,
};

Expand Down Expand Up @@ -117,7 +117,6 @@ export class KernelBrowserSession {
viewport: {
width: this.options.viewportWidth,
height: this.options.viewportHeight,
refresh_rate: 25,
},
});

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ export class ComputerTool {
private width: number;
private height: number;

constructor(kernel: Kernel, sessionId: string, width = 1200, height = 800) {
constructor(kernel: Kernel, sessionId: string, width = 1280, height = 800) {
this.kernel = kernel;
this.sessionId = sessionId;
this.width = width;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ export class PlaywrightComputerTool {
private context: BrowserContext | null = null;
private page: Page | null = null;

constructor(cdpWsUrl: string, width = 1200, height = 800) {
constructor(cdpWsUrl: string, width = 1280, height = 800) {
this.cdpWsUrl = cdpWsUrl;
this.width = width;
this.height = height;
Expand Down