BrowserOperator · tysonthomas9 · Feb 3, 2026 · Dec 26, 2025 · Dec 27, 2025 · Dec 30, 2025
diff --git a/agent-server/nodejs/src/api-server.js b/agent-server/nodejs/src/api-server.js
diff --git a/agent-server/nodejs/src/lib/BrowserAgentServer.js b/agent-server/nodejs/src/lib/BrowserAgentServer.js
@@ -1418,6 +1418,55 @@ export class BrowserAgentServer extends EventEmitter {
     }
   }
 
+  /**
+   * Execute a tool directly on a connected DevTools client
+   * This bypasses LLM orchestration and calls the tool directly
+   * @param {Object} connection - DevTools WebSocket connection
+   * @param {string} tool - Tool name (e.g., 'perform_action', 'navigate_url')
+   * @param {Object} args - Tool-specific arguments
+   * @param {number} timeout - Execution timeout in milliseconds
+   * @returns {Promise<Object>} Tool execution result
+   */
+  async executeToolDirect(connection, tool, args, timeout = 30000) {
+    const rpcId = `tool-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
+
+    logger.info('Executing tool directly', {
+      clientId: connection.clientId,
+      tool,
+      timeout
+    });
+
+    try {
+      // Prepare RPC request for execute_tool method
+      const response = await connection.rpcClient.callMethod(
+        connection.ws,
+        'execute_tool',
+        {
+          tool,
+          args,
+          timeout
+        },
+        timeout + 5000 // Add buffer for network overhead
+      );
+
+      logger.info('Tool execution completed', {
+        clientId: connection.clientId,
+        tool,
+        success: response?.result?.success
+      });
+
+      return response;
+
+    } catch (error) {
+      logger.error('Tool execution failed', {
+        clientId: connection.clientId,
+        tool,
+        error: error.message
+      });
+      throw error;
+    }
+  }
-  /**
-   * Execute a tool directly on a connected DevTools client
-   * This bypasses LLM orchestration and calls the tool directly
-   * @param {Object} connection - DevTools WebSocket connection
-   * @param {string} tool - Tool name (e.g., 'perform_action', 'navigate_url')
-   * @param {Object} args - Tool-specific arguments
-   * @param {number} timeout - Execution timeout in milliseconds
-   * @returns {Promise<Object>} Tool execution result
-   */
-  async executeToolDirect(connection, tool, args, timeout = 30000) {
-    const rpcId = `tool-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
-
-    logger.info('Executing tool directly', {
-      clientId: connection.clientId,
-      tool,
-      timeout
-    });
-
-    try {
-      // Prepare RPC request for execute_tool method
-      const response = await connection.rpcClient.callMethod(
-        connection.ws,
-        'execute_tool',
-        {
-          tool,
-          args,
-          timeout
-        },
-        timeout + 5000 // Add buffer for network overhead
-      );
-
-      logger.info('Tool execution completed', {
-        clientId: connection.clientId,
-        tool,
-        success: response?.result?.success
-      });
-
-      return response;
-
-    } catch (error) {
-      logger.error('Tool execution failed', {
-        clientId: connection.clientId,
-        tool,
-        error: error.message
-      });
-      throw error;
-    }
-  }
+  /**
+   * Execute a tool directly on a connected DevTools client
+   * This bypasses LLM orchestration and calls the tool directly
+   * @param {Object} connection - DevTools WebSocket connection
+   * @param {string} tool - Tool name (e.g., 'perform_action', 'navigate_url')
+   * @param {Object} args - Tool-specific arguments
+   * @param {number} timeout - Execution timeout in milliseconds
+   * @returns {Promise<Object>} Tool execution result
+   */
+  async executeToolDirect(connection, tool, args, timeout = 30000) {
+    const rpcId = `tool-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
+
+    if (!connection.ready) {
+      throw new Error(`Connection for client ${connection.clientId} is not ready for tool execution`);
+    }
+
+    logger.info('Executing tool directly', {
+      clientId: connection.clientId,
+      tool,
+      timeout
+    });
+
+    try {
+      // Prepare RPC request for execute_tool method
+      const response = await connection.rpcClient.callMethod(
+        connection.ws,
+        'execute_tool',
+        {
+          tool,
+          args,
+          timeout
+        },
+        timeout + 5000 // Add buffer for network overhead
+      );
+
+      logger.info('Tool execution completed', {
+        clientId: connection.clientId,
+        tool,
+        success: response?.result?.success
+      });
+
+      return response;
+
+    } catch (error) {
+      logger.error('Tool execution failed', {
+        clientId: connection.clientId,
+        tool,
+        error: error.message
+      });
+      throw error;
+    }
+  }
-  /**
-   * Execute a tool directly on a connected DevTools client
-   * This bypasses LLM orchestration and calls the tool directly
-   * @param {Object} connection - DevTools WebSocket connection
-   * @param {string} tool - Tool name (e.g., 'perform_action', 'navigate_url')
-   * @param {Object} args - Tool-specific arguments
-   * @param {number} timeout - Execution timeout in milliseconds
-   * @returns {Promise<Object>} Tool execution result
-   */
-  async executeToolDirect(connection, tool, args, timeout = 30000) {
-    const rpcId = `tool-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
-
-    logger.info('Executing tool directly', {
-      clientId: connection.clientId,
-      tool,
-      timeout
-    });
-
-    try {
-      // Prepare RPC request for execute_tool method
-      const response = await connection.rpcClient.callMethod(
-        connection.ws,
-        'execute_tool',
-        {
-          tool,
-          args,
-          timeout
-        },
-        timeout + 5000 // Add buffer for network overhead
-      );
-
-      logger.info('Tool execution completed', {
-        clientId: connection.clientId,
-        tool,
-        success: response?.result?.success
-      });
-
-      return response;
-
-    } catch (error) {
-      logger.error('Tool execution failed', {
-        clientId: connection.clientId,
-        tool,
-        error: error.message
-      });
-      throw error;
-    }
-  }
+  /**
+   * Execute a tool directly on a connected DevTools client
+   * This bypasses LLM orchestration and calls the tool directly
+   * @param {Object} connection - DevTools WebSocket connection
+   * @param {string} tool - Tool name (e.g., 'perform_action', 'navigate_url')
+   * @param {Object} args - Tool-specific arguments
+   * @param {number} timeout - Execution timeout in milliseconds
+   * @returns {Promise<Object>} Tool execution result
+   */
+  async executeToolDirect(connection, tool, args, timeout = 30000) {
+    const rpcId = `tool-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`;
+
+    if (!connection.ready) {
+      throw new Error(`Connection for client ${connection.clientId} is not ready for tool execution`);
+    }
+
+    logger.info('Executing tool directly', {
+      clientId: connection.clientId,
+      tool,
+      timeout
+    });
+
+    try {
+      // Prepare RPC request for execute_tool method
+      const response = await connection.rpcClient.callMethod(
+        connection.ws,
+        'execute_tool',
+        {
+          tool,
+          args,
+          timeout
+        },
+        timeout + 5000 // Add buffer for network overhead
+      );
+
+      logger.info('Tool execution completed', {
+        clientId: connection.clientId,
+        tool,
+        success: response?.result?.success
+      });
+
+      return response;
+
+    } catch (error) {
+      logger.error('Tool execution failed', {
+        clientId: connection.clientId,
+        tool,
+        error: error.message
+      });
+      throw error;
+    }
+  }
+
   /**
    * Execute JavaScript in a browser tab
    * @param {string} tabId - Tab ID (target ID)

diff --git a/docker/Makefile b/docker/Makefile
@@ -0,0 +1,54 @@
+# Makefile for Browser Operator Core
+# Provides DevTools frontend + Agent Server deployments
+
+.PHONY: help build devtools-up up down logs status chrome
+
+help: ## Show this help
+	@echo "Browser Operator Core - Docker Deployments"
+	@echo "==========================================="
+	@echo ""
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "  %-12s %s\n", $$1, $$2}'
+	@echo ""
+	@echo "Deployment options:"
+	@echo "  Type 1: make devtools-up  # DevTools only (AUTOMATED_MODE=true by default)"
+	@echo "  Type 2: make up           # DevTools + Agent Server (lightweight)"
+	@echo ""
+	@echo "Full build (includes agent-server in image):"
+	@echo "  make build && docker-compose up -d"
+	@echo ""
+	@echo "For manual debugging mode (Type 1 without API key bypass):"
+	@echo "  docker-compose build --build-arg AUTOMATED_MODE=false && make devtools-up"
+
+build: ## Build full image (DevTools + Agent Server baked in)
+	docker-compose build
+
+devtools-up: ## Start DevTools only (Type 1)
+	docker-compose up -d
+	@echo ""
+	@echo "DevTools UI: http://localhost:8000"
+
+up: ## Start DevTools + Agent Server (Type 2 - lightweight)
+	docker-compose -f docker-compose.lightweight.yml up -d
+	@echo ""
+	@echo "Services:"
+	@echo "  DevTools:   http://localhost:8000"
+	@echo "  Agent API:  http://localhost:8080"
+	@echo "  Agent WS:   ws://localhost:8082"
+
+down: ## Stop all containers
+	docker-compose -f docker-compose.lightweight.yml down 2>/dev/null || true
+	docker-compose down 2>/dev/null || true
+
+logs: ## Show logs
+	docker-compose -f docker-compose.lightweight.yml logs -f 2>/dev/null || docker-compose logs -f
+
+status: ## Show container status
+	@docker ps --filter "name=browser-operator"
+
+chrome: ## Launch Chrome Canary with custom DevTools
+	"/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary" \
+		--remote-debugging-port=9222 \
+		--remote-allow-origins="*" \
+		--auto-open-devtools-for-tabs \
+		--user-data-dir=/tmp/chrome-debug-profile \
+		--custom-devtools-frontend=http://localhost:8000/
diff --git a/docker/docker-compose.lightweight.yml b/docker/docker-compose.lightweight.yml
@@ -0,0 +1,46 @@
+# Lightweight deployment: DevTools + Agent Server (no full build required)
+# Uses pre-built DevTools image + separate Agent Server container
+#
+# Usage:
+#   docker-compose -f docker-compose.lightweight.yml up -d
+
+services:
+  devtools:
+    image: browser-operator-devtools:latest
+    container_name: browser-operator-devtools
+    ports:
+      - "8000:8000"
+    volumes:
+      - ./nginx.conf:/etc/nginx/conf.d/default.conf:ro
+    restart: unless-stopped
+    networks:
+      - devtools-network
+
+  agent-server:
+    image: node:18-alpine
+    container_name: browser-operator-agent
+    working_dir: /app
+    volumes:
+      - ../agent-server/nodejs:/app:ro
+      - agent-data:/app/clients
+    command: ["node", "start.js"]
+    ports:
+      - "8080:8080"
+      - "8082:8082"
+    environment:
+      - NODE_ENV=production
+      - HOST=0.0.0.0
+      - PORT=8082
+      - API_PORT=8080
+      - CDP_HOST=host.docker.internal
+      - CDP_PORT=9222
+    restart: unless-stopped
+    networks:
+      - devtools-network
+
+volumes:
+  agent-data:
+
+networks:
+  devtools-network:
+    driver: bridge
diff --git a/front_end/panels/ai_chat/evaluation/EvaluationAgent.ts b/front_end/panels/ai_chat/evaluation/EvaluationAgent.ts
@@ -20,17 +20,23 @@ import {
   EvaluationRequest,
   EvaluationSuccessResponse,
   EvaluationErrorResponse,
+  ToolExecutionRequest,
+  ToolExecutionSuccessResponse,
+  ToolExecutionErrorResponse,
   ErrorCodes,
   isWelcomeMessage,
   isRegistrationAckMessage,
   isEvaluationRequest,
+  isToolExecutionRequest,
   isPongMessage,
   createRegisterMessage,
   createReadyMessage,
   createAuthVerifyMessage,
   createStatusMessage,
   createSuccessResponse,
-  createErrorResponse
+  createErrorResponse,
+  createToolExecutionSuccessResponse,
+  createToolExecutionErrorResponse
 } from './EvaluationProtocol.js';
 
 const logger = createLogger('EvaluationAgent');
@@ -171,6 +177,9 @@ export class EvaluationAgent {
       else if (isEvaluationRequest(message)) {
         await this.handleEvaluationRequest(message);
       }
+      else if (isToolExecutionRequest(message)) {
+        await this.handleToolExecutionRequest(message);
+      }
       else if (isPongMessage(message)) {
         logger.debug('Received pong');
       }
@@ -599,6 +608,91 @@ export class EvaluationAgent {
     }
   }
 
+  /**
+   * Handle direct tool execution request (no LLM orchestration)
+   * This allows calling browser automation tools directly via API
+   */
+  private async handleToolExecutionRequest(request: ToolExecutionRequest): Promise<void> {
+    const { params, id } = request;
+    const startTime = Date.now();
+
+    logger.info('Received tool execution request', {
+      tool: params.tool,
+      hasArgs: !!params.args,
+      timeout: params.timeout
+    });
+
+    try {
+      // Get the tool from registry
+      const tool = ToolRegistry.getRegisteredTool(params.tool);
+      if (!tool) {
+        const errorResponse = createToolExecutionErrorResponse(
+          id,
+          ErrorCodes.INVALID_TOOL,
+          `Tool not found: ${params.tool}`,
+          params.tool,
+          `Tool '${params.tool}' is not registered in the ToolRegistry`
+        );
+        if (this.client) {
+          this.client.send(errorResponse);
+        }
+        return;
+      }
+
+      // Execute the tool directly (no LLM, no navigation, no retries)
+      const timeout = params.timeout || 30000;
+      const result = await this.executeToolWithTimeout(
+        tool,
+        params.args,
+        timeout,
+        undefined, // No tracing context for direct tool calls
+        params.tool
+      );
+
+      const executionTime = Date.now() - startTime;
+
+      // Send success response
+      const successResponse = createToolExecutionSuccessResponse(
+        id,
+        params.tool,
+        result,
+        executionTime
+      );
+
+      if (this.client) {
+        this.client.send(successResponse);
+      }
+
+      logger.info('Tool execution completed', {
+        tool: params.tool,
+        executionTime,
+        success: true
+      });
+
+    } catch (error) {
+      const executionTime = Date.now() - startTime;
+      const errorMessage = error instanceof Error ? error.message : 'Unknown error';
+
+      logger.error(`Tool execution failed: ${errorMessage}`, {
+        tool: params.tool,
+        executionTime
+      });
+
+      // Send error response
+      const errorResponse = createToolExecutionErrorResponse(
+        id,
+        ErrorCodes.TOOL_EXECUTION_ERROR,
+        'Tool execution failed',
+        params.tool,
+        errorMessage
+      );
+
+      if (this.client) {
+        this.client.send(errorResponse);
+      }
+    }
+  }
+
   private async executeToolWithTimeout(
     tool: any,
     input: any,

diff --git a/front_end/panels/ai_chat/evaluation/EvaluationProtocol.ts b/front_end/panels/ai_chat/evaluation/EvaluationProtocol.ts
@@ -79,6 +79,44 @@ export interface EvaluationRequest {
   id: string;
 }
 
+// Direct tool execution request (no LLM orchestration)
+export interface ToolExecutionRequest {
+  jsonrpc: '2.0';
+  method: 'execute_tool';
+  params: ToolExecutionParams;
+  id: string;
+}
+
+export interface ToolExecutionParams {
+  tool: string;           // Tool name (e.g., 'perform_action', 'navigate_url')
+  args: any;              // Tool-specific arguments
+  timeout?: number;       // Optional timeout (default 30000ms)
+}
+
+export interface ToolExecutionSuccessResponse {
+  jsonrpc: '2.0';
+  result: {
+    success: true;
+    output: any;
+    executionTime: number;
+    tool: string;
+  };
+  id: string;
+}
+
+export interface ToolExecutionErrorResponse {
+  jsonrpc: '2.0';
+  error: {
+    code: number;
+    message: string;
+    data?: {
+      tool: string;
+      error: string;
+    };
+  };
+  id: string;
+}
+
 export interface EvaluationParams {
   evaluationId: string;
   name: string;
@@ -170,6 +208,10 @@ export function isEvaluationRequest(msg: any): msg is EvaluationRequest {
   return msg?.jsonrpc === '2.0' && msg?.method === 'evaluate';
 }
 
+export function isToolExecutionRequest(msg: any): msg is ToolExecutionRequest {
+  return msg?.jsonrpc === '2.0' && msg?.method === 'execute_tool';
+}
+
 export function isPongMessage(msg: any): msg is PongMessage {
   return msg?.type === 'pong';
 }
@@ -254,4 +296,43 @@ export function createErrorResponse(
     },
     id
   };
+}
+
+export function createToolExecutionSuccessResponse(
+  id: string,
+  tool: string,
+  output: any,
+  executionTime: number
+): ToolExecutionSuccessResponse {
+  return {
+    jsonrpc: '2.0',
+    result: {
+      success: true,
+      output,
+      executionTime,
+      tool
+    },
+    id
+  };
+}
+
+export function createToolExecutionErrorResponse(
+  id: string,
+  code: number,
+  message: string,
+  tool: string,
+  error: string
+): ToolExecutionErrorResponse {
+  return {
+    jsonrpc: '2.0',
+    error: {
+      code,
+      message,
+      data: {
+        tool,
+        error
+      }
+    },
+    id
+  };
 }