|
| 1 | +--- |
| 2 | +title: "Computer Controls" |
| 3 | +description: "Control the computer's mouse, keyboard, and screen" |
| 4 | +--- |
| 5 | + |
| 6 | +Use OS-level controls to move and click the mouse, type and press keys, scroll, drag, and capture screenshots from a running browser session. |
| 7 | + |
| 8 | +## Click the mouse |
| 9 | + |
| 10 | +Simulate mouse clicks at specific coordinates. You can select the button, click type (down, up, click), number of clicks, and optional modifier keys to hold. |
| 11 | + |
| 12 | +<CodeGroup> |
| 13 | +```typescript Typescript/Javascript |
| 14 | +import { Kernel } from '@onkernel/sdk'; |
| 15 | + |
| 16 | +const kernel = new Kernel(); |
| 17 | +const kernelBrowser = await kernel.browsers.create(); |
| 18 | + |
| 19 | +// Basic left click at (100, 200) |
| 20 | +await kernel.browsers.computer.clickMouse(kernelBrowser.session_id, { |
| 21 | + x: 100, |
| 22 | + y: 200, |
| 23 | +}); |
| 24 | + |
| 25 | +// Double right-click while holding Shift |
| 26 | +await kernel.browsers.computer.clickMouse(kernelBrowser.session_id, { |
| 27 | + x: 100, |
| 28 | + y: 200, |
| 29 | + button: 'right', |
| 30 | + click_type: 'click', |
| 31 | + num_clicks: 2, |
| 32 | + hold_keys: ['Shift'], |
| 33 | +}); |
| 34 | +``` |
| 35 | + |
| 36 | +```python Python |
| 37 | +import kernel |
| 38 | + |
| 39 | +client = kernel.Kernel() |
| 40 | +kernel_browser = client.browsers.create() |
| 41 | + |
| 42 | +# Basic left click at (100, 200) |
| 43 | +client.browsers.computer.click_mouse( |
| 44 | + id=kernel_browser.session_id, |
| 45 | + x=100, |
| 46 | + y=200, |
| 47 | +) |
| 48 | + |
| 49 | +# Double right-click while holding Shift |
| 50 | +client.browsers.computer.click_mouse( |
| 51 | + id=kernel_browser.session_id, |
| 52 | + x=100, |
| 53 | + y=200, |
| 54 | + button="right", |
| 55 | + click_type="click", |
| 56 | + num_clicks=2, |
| 57 | + hold_keys=["Shift"], |
| 58 | +) |
| 59 | +``` |
| 60 | + |
| 61 | +```bash CLI |
| 62 | +# Click the mouse at coordinates (100, 200) |
| 63 | +kernel browsers computer click-mouse <session id> --x 100 --y 200 |
| 64 | + |
| 65 | +# Double-click the right mouse button |
| 66 | +kernel browsers computer click-mouse <session id> --x 100 --y 200 --num-clicks 2 --button right |
| 67 | +``` |
| 68 | +</CodeGroup> |
| 69 | + |
| 70 | +## Move the mouse |
| 71 | + |
| 72 | +Move the cursor to specific screen coordinates. Optionally hold modifier keys during the move. |
| 73 | + |
| 74 | +<CodeGroup> |
| 75 | +```typescript Typescript/Javascript |
| 76 | +import { Kernel } from '@onkernel/sdk'; |
| 77 | + |
| 78 | +const kernel = new Kernel(); |
| 79 | +const kernelBrowser = await kernel.browsers.create(); |
| 80 | + |
| 81 | +await kernel.browsers.computer.moveMouse(kernelBrowser.session_id, { |
| 82 | + x: 500, |
| 83 | + y: 300, |
| 84 | + hold_keys: ['Alt'], |
| 85 | +}); |
| 86 | +``` |
| 87 | + |
| 88 | +```python Python |
| 89 | +import kernel |
| 90 | + |
| 91 | +client = kernel.Kernel() |
| 92 | +kernel_browser = client.browsers.create() |
| 93 | + |
| 94 | +client.browsers.computer.move_mouse( |
| 95 | + id=kernel_browser.session_id, |
| 96 | + x=500, |
| 97 | + y=300, |
| 98 | + hold_keys=["Alt"], |
| 99 | +) |
| 100 | +``` |
| 101 | + |
| 102 | +```bash CLI |
| 103 | +# Move the mouse to coordinates (500, 300) |
| 104 | +kernel browsers computer move-mouse <session id> --x 500 --y 300 |
| 105 | +``` |
| 106 | +</CodeGroup> |
| 107 | + |
| 108 | +## Take screenshots |
| 109 | + |
| 110 | +Capture a full-screen PNG or a specific region. |
| 111 | + |
| 112 | +<CodeGroup> |
| 113 | +```typescript Typescript/Javascript |
| 114 | +import fs from 'fs'; |
| 115 | +import { Buffer } from 'buffer'; |
| 116 | +import { Kernel } from '@onkernel/sdk'; |
| 117 | + |
| 118 | +const kernel = new Kernel(); |
| 119 | +const kernelBrowser = await kernel.browsers.create(); |
| 120 | + |
| 121 | +// Full screenshot |
| 122 | +{ |
| 123 | + const response = await kernel.browsers.computer.captureScreenshot(kernelBrowser.session_id); |
| 124 | + const blob = await response.blob(); |
| 125 | + const buffer = Buffer.from(await blob.arrayBuffer()); |
| 126 | + fs.writeFileSync('screenshot.png', buffer); |
| 127 | +} |
| 128 | + |
| 129 | +// Region screenshot |
| 130 | +{ |
| 131 | + const response = await kernel.browsers.computer.captureScreenshot(kernelBrowser.session_id, { |
| 132 | + region: { x: 0, y: 0, width: 800, height: 600 }, |
| 133 | + }); |
| 134 | + const blob = await response.blob(); |
| 135 | + const buffer = Buffer.from(await blob.arrayBuffer()); |
| 136 | + fs.writeFileSync('region.png', buffer); |
| 137 | +} |
| 138 | +``` |
| 139 | + |
| 140 | +```python Python |
| 141 | +import kernel |
| 142 | + |
| 143 | +client = kernel.Kernel() |
| 144 | +kernel_browser = client.browsers.create() |
| 145 | + |
| 146 | +# Full screenshot |
| 147 | +with open('screenshot.png', 'wb') as f: |
| 148 | + image_data = client.browsers.computer.capture_screenshot(id=kernel_browser.session_id) |
| 149 | + f.write(image_data.read()) |
| 150 | + |
| 151 | +# Region screenshot |
| 152 | +with open('region.png', 'wb') as f: |
| 153 | + image_data = client.browsers.computer.capture_screenshot( |
| 154 | + id=kernel_browser.session_id, |
| 155 | + region={"x": 0, "y": 0, "width": 800, "height": 600}, |
| 156 | + ) |
| 157 | + f.write(image_data.read()) |
| 158 | +``` |
| 159 | + |
| 160 | +```bash CLI |
| 161 | +# Take a full screenshot |
| 162 | +kernel browsers computer screenshot <session id> --to screenshot.png |
| 163 | + |
| 164 | +# Take a screenshot of a specific region |
| 165 | +kernel browsers computer screenshot <session id> --to region.png --x 0 --y 0 --width 800 --height 600 |
| 166 | +``` |
| 167 | +</CodeGroup> |
| 168 | + |
| 169 | +## Type text |
| 170 | + |
| 171 | +Type literal text, optionally with a delay in milliseconds between keystrokes. |
| 172 | + |
| 173 | +<CodeGroup> |
| 174 | +```typescript Typescript/Javascript |
| 175 | +import { Kernel } from '@onkernel/sdk'; |
| 176 | + |
| 177 | +const kernel = new Kernel(); |
| 178 | +const kernelBrowser = await kernel.browsers.create(); |
| 179 | + |
| 180 | +await kernel.browsers.computer.typeText(kernelBrowser.session_id, { |
| 181 | + text: 'Hello, World!', |
| 182 | +}); |
| 183 | + |
| 184 | +await kernel.browsers.computer.typeText(kernelBrowser.session_id, { |
| 185 | + text: 'Slow typing...', |
| 186 | + delay: 100, |
| 187 | +}); |
| 188 | +``` |
| 189 | + |
| 190 | +```python Python |
| 191 | +import kernel |
| 192 | + |
| 193 | +client = kernel.Kernel() |
| 194 | +kernel_browser = client.browsers.create() |
| 195 | + |
| 196 | +client.browsers.computer.type_text( |
| 197 | + id=kernel_browser.session_id, |
| 198 | + text="Hello, World!", |
| 199 | +) |
| 200 | + |
| 201 | +client.browsers.computer.type_text( |
| 202 | + id=kernel_browser.session_id, |
| 203 | + text="Slow typing...", |
| 204 | + delay=100, |
| 205 | +) |
| 206 | +``` |
| 207 | + |
| 208 | +```bash CLI |
| 209 | +# Type text in the browser |
| 210 | +kernel browsers computer type <session id> --text "Hello, World!" |
| 211 | + |
| 212 | +# Type text with a 100ms delay between keystrokes |
| 213 | +kernel browsers computer type <session id> --text "Slow typing..." --delay 100 |
| 214 | +``` |
| 215 | +</CodeGroup> |
| 216 | + |
| 217 | +## Press keys |
| 218 | + |
| 219 | +Press one or more key symbols (including combinations like "Ctrl+t" or "Ctrl+Shift+Tab"). Optionally hold modifiers and/or set a duration to hold keys down. |
| 220 | + |
| 221 | +<CodeGroup> |
| 222 | +```typescript Typescript/Javascript |
| 223 | +import { Kernel } from '@onkernel/sdk'; |
| 224 | + |
| 225 | +const kernel = new Kernel(); |
| 226 | +const kernelBrowser = await kernel.browsers.create(); |
| 227 | + |
| 228 | +// Tap a key combination |
| 229 | +await kernel.browsers.computer.pressKey(kernelBrowser.session_id, { |
| 230 | + keys: ['Ctrl+t'], |
| 231 | +}); |
| 232 | + |
| 233 | +// Hold keys for 250ms while also holding Alt |
| 234 | +await kernel.browsers.computer.pressKey(kernelBrowser.session_id, { |
| 235 | + keys: ['Ctrl+Shift+Tab'], |
| 236 | + duration: 250, |
| 237 | + hold_keys: ['Alt'], |
| 238 | +}); |
| 239 | +``` |
| 240 | + |
| 241 | +```python Python |
| 242 | +import kernel |
| 243 | + |
| 244 | +client = kernel.Kernel() |
| 245 | +kernel_browser = client.browsers.create() |
| 246 | + |
| 247 | +# Tap a key combination |
| 248 | +client.browsers.computer.press_key( |
| 249 | + id=kernel_browser.session_id, |
| 250 | + keys=["Ctrl+t"], |
| 251 | +) |
| 252 | + |
| 253 | +# Hold keys for 250ms while also holding Alt |
| 254 | +client.browsers.computer.press_key( |
| 255 | + id=kernel_browser.session_id, |
| 256 | + keys=["Ctrl+Shift+Tab"], |
| 257 | + duration=250, |
| 258 | + hold_keys=["Alt"], |
| 259 | +) |
| 260 | +``` |
| 261 | + |
| 262 | +```bash CLI |
| 263 | +# Press one or more keys (repeatable --key) |
| 264 | +kernel browsers computer press-key <session id> --key Ctrl+t |
| 265 | + |
| 266 | +# Hold for a duration and add optional modifiers |
| 267 | +kernel browsers computer press-key <session id> --key Ctrl+Shift+Tab --duration 250 --hold-key Alt |
| 268 | +``` |
| 269 | +</CodeGroup> |
| 270 | + |
| 271 | +## Scroll |
| 272 | + |
| 273 | +Scroll the mouse wheel at a specific position. Positive `delta_y` scrolls down; negative scrolls up. Positive `delta_x` scrolls right; negative scrolls left. |
| 274 | + |
| 275 | +<CodeGroup> |
| 276 | +```typescript Typescript/Javascript |
| 277 | +import { Kernel } from '@onkernel/sdk'; |
| 278 | + |
| 279 | +const kernel = new Kernel(); |
| 280 | +const kernelBrowser = await kernel.browsers.create(); |
| 281 | + |
| 282 | +await kernel.browsers.computer.scroll(kernelBrowser.session_id, { |
| 283 | + x: 300, |
| 284 | + y: 400, |
| 285 | + delta_x: 0, |
| 286 | + delta_y: 120, |
| 287 | +}); |
| 288 | +``` |
| 289 | + |
| 290 | +```python Python |
| 291 | +import kernel |
| 292 | + |
| 293 | +client = kernel.Kernel() |
| 294 | +kernel_browser = client.browsers.create() |
| 295 | + |
| 296 | +client.browsers.computer.scroll( |
| 297 | + id=kernel_browser.session_id, |
| 298 | + x=300, |
| 299 | + y=400, |
| 300 | + delta_x=0, |
| 301 | + delta_y=120, |
| 302 | +) |
| 303 | +``` |
| 304 | + |
| 305 | +```bash CLI |
| 306 | +# Scroll at a position |
| 307 | +kernel browsers computer scroll <session id> --x 300 --y 400 --delta-y 120 |
| 308 | +``` |
| 309 | +</CodeGroup> |
| 310 | + |
| 311 | +## Drag the mouse |
| 312 | + |
| 313 | +Drag by pressing a button, moving along a path of points, then releasing. You can control delay before starting, the granularity and speed of the drag via `steps_per_segment` and `step_delay_ms`, and optionally hold modifier keys. |
| 314 | + |
| 315 | +<CodeGroup> |
| 316 | +```typescript Typescript/Javascript |
| 317 | +import { Kernel } from '@onkernel/sdk'; |
| 318 | + |
| 319 | +const kernel = new Kernel(); |
| 320 | +const kernelBrowser = await kernel.browsers.create(); |
| 321 | + |
| 322 | +await kernel.browsers.computer.dragMouse(kernelBrowser.session_id, { |
| 323 | + path: [ |
| 324 | + [100, 200], |
| 325 | + [150, 220], |
| 326 | + [200, 260], |
| 327 | + ], |
| 328 | + button: 'left', |
| 329 | + delay: 0, |
| 330 | + steps_per_segment: 10, |
| 331 | + step_delay_ms: 50, |
| 332 | + hold_keys: ['Shift'], |
| 333 | +}); |
| 334 | +``` |
| 335 | + |
| 336 | +```python Python |
| 337 | +import kernel |
| 338 | + |
| 339 | +client = kernel.Kernel() |
| 340 | +kernel_browser = client.browsers.create() |
| 341 | + |
| 342 | +client.browsers.computer.drag_mouse( |
| 343 | + id=kernel_browser.session_id, |
| 344 | + path=[[100, 200], [150, 220], [200, 260]], |
| 345 | + button="left", |
| 346 | + delay=0, |
| 347 | + steps_per_segment=10, |
| 348 | + step_delay_ms=50, |
| 349 | + hold_keys=["Shift"], |
| 350 | +) |
| 351 | +``` |
| 352 | + |
| 353 | +```bash CLI |
| 354 | +# Drag the mouse along a path |
| 355 | +kernel browsers computer drag-mouse <session id> \ |
| 356 | + --point 100,200 \ |
| 357 | + --point 150,220 \ |
| 358 | + --point 200,260 \ |
| 359 | + --button left \ |
| 360 | + --delay 0 |
| 361 | +``` |
| 362 | +</CodeGroup> |
0 commit comments