steel-dev · mislavjc · Mar 12, 2025 · Mar 12, 2025 · Mar 12, 2025 · Mar 12, 2025
diff --git a/README.md b/README.md
@@ -60,18 +60,18 @@ npm run dev
 ```
 
 > ### Windows Users
-> 
+>
 > If you're developing on Windows, you should use the Windows-specific command:
-> 
+>
 > ```bash
 > npm run dev:win
 > ```
-> 
+>
 > **Technical Reason:** Windows has two different asyncio event loop implementations:
-> 
+>
 > - **SelectorEventLoop** (default): Uses select-based I/O and doesn't support subprocesses properly
 > - **ProactorEventLoop**: Uses I/O completion ports and fully supports subprocesses
-> 
+>
 > Playwright requires subprocess support to launch browsers. When hot reloading is enabled, the default SelectorEventLoop is used, causing a `NotImplementedError` when Playwright tries to create a subprocess.
 > Reference Issue: https://github.com/steel-dev/surf.new/issues/32
 

diff --git a/api/index.py b/api/index.py
@@ -14,6 +14,8 @@
 import asyncio
 import subprocess
 import re
+import time
+import json
 
 # 1) Import the Steel client
 try:
@@ -25,7 +27,7 @@
 load_dotenv(".env.local")
 
 app = FastAPI()
-app.add_middleware(ProfilingMiddleware) # Uncomment this when profiling is not needed
+app.add_middleware(ProfilingMiddleware)  # Uncomment this when profiling is not needed
 STEEL_API_KEY = os.getenv("STEEL_API_KEY")
 ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
 STEEL_API_URL = os.getenv("STEEL_API_URL")
@@ -148,8 +150,7 @@ async def on_disconnect():
         )
 
         # Use background=on_disconnect to catch client-aborted requests
-        response = StreamingResponse(
-            streaming_response, background=on_disconnect)
+        response = StreamingResponse(streaming_response, background=on_disconnect)
         response.headers["x-vercel-ai-data-stream"] = "v1"
         # response.headers["model_used"] = request.model_name
         return response
@@ -162,8 +163,7 @@ async def on_disconnect():
                 "code": getattr(e, "code", 500),
             }
         }
-        raise HTTPException(status_code=getattr(
-            e, "code", 500), detail=error_response)
+        raise HTTPException(status_code=getattr(e, "code", 500), detail=error_response)
 
 
 @app.get("/api/agents", tags=["Agents"])
@@ -187,17 +187,19 @@ class OllamaModel(BaseModel):
     tag: str
     base_name: str
 
+
 class OllamaModelsResponse(BaseModel):
     models: List[OllamaModel]
 
+
 @app.get("/api/ollama/models", response_model=OllamaModelsResponse, tags=["Ollama"])
 async def get_ollama_models():
     """
     Fetches available models from a local Ollama instance using the 'ollama list' command.
-    
+
     Returns:
         A list of model objects with full tags and base names that can be used with Ollama.
-        
+
     Example response:
         {
             "models": [
@@ -214,37 +216,29 @@ async def get_ollama_models():
     """
     try:
         result = subprocess.run(
-            ["ollama", "list"], 
-            capture_output=True, 
-            text=True, 
-            check=True
+            ["ollama", "list"], capture_output=True, text=True, check=True
         )
-        
+
         models = []
-        lines = result.stdout.strip().split('\n')
-        
+        lines = result.stdout.strip().split("\n")
+
         if lines and "NAME" in lines[0] and "ID" in lines[0]:
             lines = lines[1:]
-        
+
         for line in lines:
             if line.strip():
-                parts = re.split(r'\s{2,}', line.strip())
+                parts = re.split(r"\s{2,}", line.strip())
                 if parts and parts[0]:
                     full_tag = parts[0]
-                    base_name = full_tag.split(':')[0] if ':' in full_tag else full_tag
-                    models.append({
-                        "tag": full_tag,
-                        "base_name": base_name
-                    })
-
+                    base_name = full_tag.split(":")[0] if ":" in full_tag else full_tag
+                    models.append({"tag": full_tag, "base_name": base_name})
+
         return {"models": models}
     except subprocess.CalledProcessError as e:
         raise HTTPException(
-            status_code=500, 
-            detail=f"Failed to fetch Ollama models: {e.stderr}"
+            status_code=500, detail=f"Failed to fetch Ollama models: {e.stderr}"
         )
     except Exception as e:
         raise HTTPException(
-            status_code=500, 
-            detail=f"Error fetching Ollama models: {str(e)}"
+            status_code=500, detail=f"Error fetching Ollama models: {str(e)}"
         )
diff --git a/api/models.py b/api/models.py
@@ -6,6 +6,7 @@ class ModelProvider(str, Enum):
     OPENAI = "openai"
     ANTHROPIC = "anthropic"
     ANTHROPIC_COMPUTER_USE = "anthropic_computer_use"
+    OPENAI_COMPUTER_USE = "openai_computer_use"
     GEMINI = "gemini"
     DEEPSEEK = "deepseek"
     OLLAMA = "ollama"
@@ -79,6 +80,7 @@ def default_model(provider: ModelProvider) -> str:
             ModelProvider.OPENAI: "gpt-4o-mini",
             ModelProvider.ANTHROPIC: "claude-3-7-sonnet-latest",
             ModelProvider.ANTHROPIC_COMPUTER_USE: "claude-3-5-sonnet-20241022",
+            ModelProvider.OPENAI_COMPUTER_USE: "computer-use-preview",
             ModelProvider.GEMINI: "gemini-2.0-flash",
             ModelProvider.DEEPSEEK: "deepseek-chat",
             ModelProvider.OLLAMA: "llama3.3",

diff --git a/api/plugins/__init__.py b/api/plugins/__init__.py
@@ -13,8 +13,10 @@
 from .base import base_agent
 from .claude_computer_use import claude_computer_use
 from .browser_use import browser_use_agent
+from .openai_computer_use import openai_computer_use_agent
 from ..utils.types import AgentSettings
-from .claude_computer_use.prompts import SYSTEM_PROMPT
+from .claude_computer_use.prompts import SYSTEM_PROMPT as CLAUDE_SYSTEM_PROMPT
+from .openai_computer_use.prompts import SYSTEM_PROMPT as OPENAI_SYSTEM_PROMPT
 
 # from .example_plugin import example_agent
 
@@ -24,6 +26,7 @@ class WebAgentType(Enum):
     EXAMPLE = "example"
     CLAUDE_COMPUTER_USE = "claude_computer_use"
     BROWSER_USE = "browser_use_agent"
+    OPENAI_COMPUTER_USE = "openai_computer_use_agent"
 
 
 class SettingType(Enum):
@@ -197,7 +200,7 @@ class SettingConfig(TypedDict):
         "agent_settings": {
             "system_prompt": {
                 "type": SettingType.TEXTAREA.value,
-                "default": SYSTEM_PROMPT,
+                "default": CLAUDE_SYSTEM_PROMPT,
                 "maxLength": 4000,
                 "description": "System prompt for the agent",
             },
@@ -217,7 +220,62 @@ class SettingConfig(TypedDict):
             },
         },
     },
-
+    WebAgentType.OPENAI_COMPUTER_USE.value: {
+        "name": "OpenAI Computer Use",
+        "description": "Agent that uses OpenAI's Computer-Using Agent (CUA) via the /v1/responses API",
+        "supported_models": [
+            {
+                "provider": ModelProvider.OPENAI_COMPUTER_USE.value,
+                "models": ["computer-use-preview"],
+            }
+        ],
+        "model_settings": {
+            "max_tokens": {
+                "type": SettingType.INTEGER.value,
+                "default": 3000,
+                "min": 1,
+                "max": 4096,
+                "description": "Maximum tokens for the responses endpoint",
+            },
+            "temperature": {
+                "type": SettingType.FLOAT.value,
+                "default": 0.2,
+                "min": 0,
+                "max": 1,
+                "step": 0.05,
+                "description": "Optional temperature param for final assistant messages",
+            },
+        },
+        "agent_settings": {
+            "system_prompt": {
+                "type": SettingType.TEXTAREA.value,
+                "default": OPENAI_SYSTEM_PROMPT,
+                "maxLength": 4000,
+                "description": "Custom system prompt for the agent",
+            },
+            "num_images_to_keep": {
+                "type": SettingType.INTEGER.value,
+                "default": 10,
+                "min": 1,
+                "max": 50,
+                "description": "Number of images to keep in memory",
+            },
+            "wait_time_between_steps": {
+                "type": SettingType.INTEGER.value,
+                "default": 1,
+                "min": 0,
+                "max": 10,
+                "description": "Wait time between steps in seconds",
+            },
+            "max_steps": {
+                "type": SettingType.INTEGER.value,
+                "default": 30,
+                "min": 10,
+                "max": 50,
+                "description": "Maximum number of steps the agent can take",
+            }
+        },
+    },
 }
 
 
@@ -232,6 +290,8 @@ def get_web_agent(
         return claude_computer_use
     elif name == WebAgentType.BROWSER_USE:
         return browser_use_agent
+    elif name == WebAgentType.OPENAI_COMPUTER_USE:
+        return openai_computer_use_agent
     else:
         raise ValueError(f"Invalid agent type: {name}")
 

diff --git a/api/plugins/openai_computer_use/__init__.py b/api/plugins/openai_computer_use/__init__.py
@@ -0,0 +1,3 @@
+from .agent import openai_computer_use_agent
+
+__all__ = ["openai_computer_use_agent"]
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		from .agent import openai_computer_use_agent

		__all__ = ["openai_computer_use_agent"]