Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
bd2aa02
feat: wip add computer use
mislavjc Mar 12, 2025
f7d9445
fix: fixed streaming issue
hussufo Mar 12, 2025
48c248f
fix: fixed screenshot display issue
hussufo Mar 12, 2025
d5f03ce
feat: add key mapping and change default url
mislavjc Mar 12, 2025
735159d
feat: add cursor
mislavjc Mar 12, 2025
f39996a
feat: make cancelation work
mislavjc Mar 12, 2025
5193ff3
feat: map agent settings
mislavjc Mar 12, 2025
3fb72e7
feat: add back and forward
mislavjc Mar 13, 2025
30fae82
feat: add goto url
mislavjc Mar 13, 2025
e7e6507
fix: duplicate message issue
mislavjc Mar 13, 2025
00dd9bd
chore: only have one model
mislavjc Mar 13, 2025
19533e2
fix: adjust viewport sizing
mislavjc Mar 13, 2025
3f74520
chore: use bing as start page
mislavjc Mar 13, 2025
f7ca6dd
chore: update cursor
mislavjc Mar 13, 2025
b6f27e2
feat: show system prompt
mislavjc Mar 13, 2025
fa24987
feat: add cot
mislavjc Mar 13, 2025
ba9a4e4
feat: yield full text
mislavjc Mar 13, 2025
17c7883
fix: wait before ss not after
mislavjc Mar 13, 2025
b895e02
feat: make new tabs open in same tab
mislavjc Mar 14, 2025
23f0fc0
fix: switch to google as default
mislavjc Mar 14, 2025
5770f7d
feat: update system prompt
mislavjc Mar 14, 2025
0ac64b6
fix: ollama always checking
mislavjc Mar 17, 2025
dd8aea0
fix: make goto url work
mislavjc Mar 17, 2025
f641ed9
fix: improve handling of new pages
mislavjc Mar 17, 2025
fe6fb6d
feat: refactor agent code
mislavjc Mar 17, 2025
7a84cef
fix: trimmed images error
fukouda Mar 18, 2025
6ee9083
fix: assistant stop + rerendering issue
fukouda Mar 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,18 +60,18 @@ npm run dev
```

> ### Windows Users
>
>
> If you're developing on Windows, you should use the Windows-specific command:
>
>
> ```bash
> npm run dev:win
> ```
>
>
> **Technical Reason:** Windows has two different asyncio event loop implementations:
>
>
> - **SelectorEventLoop** (default): Uses select-based I/O and doesn't support subprocesses properly
> - **ProactorEventLoop**: Uses I/O completion ports and fully supports subprocesses
>
>
> Playwright requires subprocess support to launch browsers. When hot reloading is enabled, the default SelectorEventLoop is used, causing a `NotImplementedError` when Playwright tries to create a subprocess.
> Reference Issue: https://github.com/steel-dev/surf.new/issues/32

Expand Down
46 changes: 20 additions & 26 deletions api/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
import asyncio
import subprocess
import re
import time
import json

# 1) Import the Steel client
try:
Expand All @@ -25,7 +27,7 @@
load_dotenv(".env.local")

app = FastAPI()
app.add_middleware(ProfilingMiddleware) # Uncomment this when profiling is not needed
app.add_middleware(ProfilingMiddleware) # Uncomment this when profiling is not needed
STEEL_API_KEY = os.getenv("STEEL_API_KEY")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
STEEL_API_URL = os.getenv("STEEL_API_URL")
Expand Down Expand Up @@ -148,8 +150,7 @@ async def on_disconnect():
)

# Use background=on_disconnect to catch client-aborted requests
response = StreamingResponse(
streaming_response, background=on_disconnect)
response = StreamingResponse(streaming_response, background=on_disconnect)
response.headers["x-vercel-ai-data-stream"] = "v1"
# response.headers["model_used"] = request.model_name
return response
Expand All @@ -162,8 +163,7 @@ async def on_disconnect():
"code": getattr(e, "code", 500),
}
}
raise HTTPException(status_code=getattr(
e, "code", 500), detail=error_response)
raise HTTPException(status_code=getattr(e, "code", 500), detail=error_response)


@app.get("/api/agents", tags=["Agents"])
Expand All @@ -187,17 +187,19 @@ class OllamaModel(BaseModel):
tag: str
base_name: str


class OllamaModelsResponse(BaseModel):
models: List[OllamaModel]


@app.get("/api/ollama/models", response_model=OllamaModelsResponse, tags=["Ollama"])
async def get_ollama_models():
"""
Fetches available models from a local Ollama instance using the 'ollama list' command.

Returns:
A list of model objects with full tags and base names that can be used with Ollama.

Example response:
{
"models": [
Expand All @@ -214,37 +216,29 @@ async def get_ollama_models():
"""
try:
result = subprocess.run(
["ollama", "list"],
capture_output=True,
text=True,
check=True
["ollama", "list"], capture_output=True, text=True, check=True
)

models = []
lines = result.stdout.strip().split('\n')
lines = result.stdout.strip().split("\n")

if lines and "NAME" in lines[0] and "ID" in lines[0]:
lines = lines[1:]

for line in lines:
if line.strip():
parts = re.split(r'\s{2,}', line.strip())
parts = re.split(r"\s{2,}", line.strip())
if parts and parts[0]:
full_tag = parts[0]
base_name = full_tag.split(':')[0] if ':' in full_tag else full_tag
models.append({
"tag": full_tag,
"base_name": base_name
})

base_name = full_tag.split(":")[0] if ":" in full_tag else full_tag
models.append({"tag": full_tag, "base_name": base_name})

return {"models": models}
except subprocess.CalledProcessError as e:
raise HTTPException(
status_code=500,
detail=f"Failed to fetch Ollama models: {e.stderr}"
status_code=500, detail=f"Failed to fetch Ollama models: {e.stderr}"
)
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Error fetching Ollama models: {str(e)}"
status_code=500, detail=f"Error fetching Ollama models: {str(e)}"
)
2 changes: 2 additions & 0 deletions api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ class ModelProvider(str, Enum):
OPENAI = "openai"
ANTHROPIC = "anthropic"
ANTHROPIC_COMPUTER_USE = "anthropic_computer_use"
OPENAI_COMPUTER_USE = "openai_computer_use"
GEMINI = "gemini"
DEEPSEEK = "deepseek"
OLLAMA = "ollama"
Expand Down Expand Up @@ -79,6 +80,7 @@ def default_model(provider: ModelProvider) -> str:
ModelProvider.OPENAI: "gpt-4o-mini",
ModelProvider.ANTHROPIC: "claude-3-7-sonnet-latest",
ModelProvider.ANTHROPIC_COMPUTER_USE: "claude-3-5-sonnet-20241022",
ModelProvider.OPENAI_COMPUTER_USE: "computer-use-preview",
ModelProvider.GEMINI: "gemini-2.0-flash",
ModelProvider.DEEPSEEK: "deepseek-chat",
ModelProvider.OLLAMA: "llama3.3",
Expand Down
66 changes: 63 additions & 3 deletions api/plugins/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
from .base import base_agent
from .claude_computer_use import claude_computer_use
from .browser_use import browser_use_agent
from .openai_computer_use import openai_computer_use_agent
from ..utils.types import AgentSettings
from .claude_computer_use.prompts import SYSTEM_PROMPT
from .claude_computer_use.prompts import SYSTEM_PROMPT as CLAUDE_SYSTEM_PROMPT
from .openai_computer_use.prompts import SYSTEM_PROMPT as OPENAI_SYSTEM_PROMPT

# from .example_plugin import example_agent

Expand All @@ -24,6 +26,7 @@ class WebAgentType(Enum):
EXAMPLE = "example"
CLAUDE_COMPUTER_USE = "claude_computer_use"
BROWSER_USE = "browser_use_agent"
OPENAI_COMPUTER_USE = "openai_computer_use_agent"


class SettingType(Enum):
Expand Down Expand Up @@ -197,7 +200,7 @@ class SettingConfig(TypedDict):
"agent_settings": {
"system_prompt": {
"type": SettingType.TEXTAREA.value,
"default": SYSTEM_PROMPT,
"default": CLAUDE_SYSTEM_PROMPT,
"maxLength": 4000,
"description": "System prompt for the agent",
},
Expand All @@ -217,7 +220,62 @@ class SettingConfig(TypedDict):
},
},
},

WebAgentType.OPENAI_COMPUTER_USE.value: {
"name": "OpenAI Computer Use",
"description": "Agent that uses OpenAI's Computer-Using Agent (CUA) via the /v1/responses API",
"supported_models": [
{
"provider": ModelProvider.OPENAI_COMPUTER_USE.value,
"models": ["computer-use-preview"],
}
],
"model_settings": {
"max_tokens": {
"type": SettingType.INTEGER.value,
"default": 3000,
"min": 1,
"max": 4096,
"description": "Maximum tokens for the responses endpoint",
},
"temperature": {
"type": SettingType.FLOAT.value,
"default": 0.2,
"min": 0,
"max": 1,
"step": 0.05,
"description": "Optional temperature param for final assistant messages",
},
},
"agent_settings": {
"system_prompt": {
"type": SettingType.TEXTAREA.value,
"default": OPENAI_SYSTEM_PROMPT,
"maxLength": 4000,
"description": "Custom system prompt for the agent",
},
"num_images_to_keep": {
"type": SettingType.INTEGER.value,
"default": 10,
"min": 1,
"max": 50,
"description": "Number of images to keep in memory",
},
"wait_time_between_steps": {
"type": SettingType.INTEGER.value,
"default": 1,
"min": 0,
"max": 10,
"description": "Wait time between steps in seconds",
},
"max_steps": {
"type": SettingType.INTEGER.value,
"default": 30,
"min": 10,
"max": 50,
"description": "Maximum number of steps the agent can take",
}
},
},
}


Expand All @@ -232,6 +290,8 @@ def get_web_agent(
return claude_computer_use
elif name == WebAgentType.BROWSER_USE:
return browser_use_agent
elif name == WebAgentType.OPENAI_COMPUTER_USE:
return openai_computer_use_agent
else:
raise ValueError(f"Invalid agent type: {name}")

Expand Down
3 changes: 3 additions & 0 deletions api/plugins/openai_computer_use/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .agent import openai_computer_use_agent

__all__ = ["openai_computer_use_agent"]
Loading