morph-labs · ashish-sarvam · May 15, 2025 · May 15, 2025 · May 15, 2025 · May 15, 2025
diff --git a/.env.example b/.env.example
@@ -6,4 +6,9 @@ OPENAI_ORG = "org-123"
 BROWSERBASE_API_KEY="00000000-0000-0000-0000-000000000000"
 BROWSERBASE_PROJECT_ID="bb_live_00000000-00000"
 
-SCRAPYBARA_API_KEY="scrapy-123"
+SCRAPYBARA_API_KEY="scrapy-123"
+
+MORPH_API_KEY=""
+
+AZURE_OPENAI_API_KEY=""
+AZURE_OPENAI_ENDPOINT=""
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,5 @@
 __pycache__/
 .env
-.venv/
+.venv/
+trajectory/
+venv/
diff --git a/README.md b/README.md
@@ -28,17 +28,19 @@ This fork enhances the OpenAI Computer Using Agent sample app with Morph Cloud i
 To use Morph Cloud with the CUA sample app:
 
 ```bash
-# Set up Python environment
-python3 -m venv env
-source env/bin/activate
+# Set up Python environment with conda
+conda create -n manas python=3.11
+conda activate manas
 pip install -r requirements.txt
 
 # Install Morph Cloud and set API key
+
+
 pip install morphcloud
 export MORPH_API_KEY=your_api_key_here 
 
 # Run with Morph Cloud
-python cli.py --computer morph
+python cli.py --input "Open tokyo wikipedia page" --storage-folder ./trajectory --computer morph
 ```
 
 ### Exploring Infinibranch Capabilities

diff --git a/agent/autonomous_agent.py b/agent/autonomous_agent.py
diff --git a/agent/branching_agent.py b/agent/branching_agent.py
@@ -210,6 +210,9 @@ def create_agents(self, branches=None, instructions=None, context=None, agent_kw
 
             # Create agent kwargs for this specific branch
             branch_agent_kwargs = agent_kwargs.copy()
+
+            # No need to modify storage_folder - let AutonomousAgent constructor handle this
+
             branch_agent_kwargs.update({
                 "tools": branch_tools,
                 "initial_task": full_instruction,
@@ -447,5 +450,3 @@ def cleanup_main_computer(self):
                 console.print("[green]Cleaned up main computer[/]")
             except Exception as e:
                 console.print(f"[bold red]Error cleaning up main computer: {e}[/]")
-
-
diff --git a/cli.py b/cli.py
@@ -1,5 +1,9 @@
 import argparse
+import os
+import time
 from agent.agent import Agent
+from agent.autonomous_agent import AutonomousAgent
+from agent.branching_agent import BranchingAgent
 from computers import (
     BrowserbaseBrowser,
     ScrapybaraBrowser,
@@ -10,13 +14,66 @@
 )
 
 
-def acknowledge_safety_check_callback(message: str) -> bool:
-    response = input(
-        f"Safety Check Warning: {message}\nDo you want to acknowledge and proceed? (y/n): "
-    ).lower()
-    return response.lower().strip() == "y"
+SYSTEM_PROMPT = """
+You are Manas, an AI agent created by the Sarvam team.
 
+Your mission is to complete a wide range of computer-based tasks using autonomous reasoning, programming, and internet access. You operate inside a Linux sandbox environment and work in iterative collaboration with users to plan, execute, and deliver reliable, high-quality results.
 
+You excel at:
+	•	Gathering, verifying, and documenting information from trustworthy sources
+	•	Processing, analyzing, and visualizing complex data
+	•	Writing structured articles, multi-chapter essays, and long-form research reports
+	•	Developing websites, applications, and technical tools
+	•	Solving diverse technical and operational problems through programming
+	•	Completing any task that can be achieved using computers and the internet
+
+Language policy:
+	•	Default working language is English
+	•	If a user specifies a different language, switch to that for all communication, reasoning, and tool interaction
+	•	All natural language arguments in tool calls must follow the current working language
+
+System capabilities:
+	•	Interact with users via message-based communication
+	•	Access a Linux sandbox environment with internet connectivity
+	•	Use the shell, browser, and VS Code as main interfaces
+	•	Write, edit, and execute code using Visual Studio Code
+	•	Navigate file systems and repositories using VS Code
+	•	Install and manage dependencies using the shell
+	•	Deploy websites or applications and provide publicly accessible URLs
+	•	Request the user to intervene in the browser for secure or sensitive interactions
+	•	Perform all search tasks using DuckDuckGo
+	•	Leverage other AI agents such as ChatGPT as assistants for complex planning
+
+Strategic behavior:
+	•	Before attempting any challenging or ambiguous task, prioritize building a clear, actionable plan
+	•	Use DuckDuckGo for gathering context and research
+	•	Use ChatGPT (or similar AI agents) to support planning, idea generation, or clarification
+	•	Once a strategy is formed, proceed with step-by-step execution
+	•	Break down large goals into smaller, testable stages
+	•	Continuously revise your plan based on feedback and results
+
+Tool preferences:
+	•	Use DuckDuckGo for privacy-first web search
+	•	Use Visual Studio Code for:
+	•	Software development
+	•	Writing and running code
+	•	Navigating and editing folders and repositories
+
+Agent loop:
+	1.	Analyze Events: Interpret the user’s intent and current system state by monitoring the event stream
+	2.	Select Tools: Choose the next best action or tool call based on task requirements, available tools, and current observations
+	3.	Wait for Execution: Await the result of the selected action before proceeding
+	4.	Iterate: Take one action per cycle; repeat until the task is complete or new input is received
+	5.	Submit Results: Deliver outputs, completed files, or live links to the user
+	6.	Enter Standby: Wait for the next instruction when idle or paused
+
+Behavioral constraints:
+	•	Avoid pure bullet points unless explicitly requested
+	•	Communicate in natural, structured, and thoughtful language
+	•	Think clearly, act precisely, and move step by step
+
+You are Manas, an advanced operational agent built by the Sarvam team. You combine the clarity of a strategist, the discipline of a developer, and the reliability of a systems engineer. You think before you act—and always plan before tackling the complex.
+"""
 def main():
     parser = argparse.ArgumentParser(
         description="Select a computer environment from the available options."
@@ -56,8 +113,34 @@ def main():
         help="Start the browsing session with a specific URL (only for browser environments).",
         default="https://bing.com",
     )
+    parser.add_argument(
+        "--num_branches",
+        type=int,
+        help="Number of branches to create.",
+        default=3,
+    )
+    parser.add_argument(
+        "--storage-folder",
+        type=str,
+        help="Folder path for storing agent data. Will be created if it doesn't exist.",
+        default="./agent_storage",
+    )
     args = parser.parse_args()
 
+    # Ensure storage folder exists
+    if args.storage_folder:
+        os.makedirs(args.storage_folder, exist_ok=True)
+        print(f"Storage folder initialized at: {args.storage_folder}")
+
+        # Create a timestamped subfolder for this run
+        timestamp = time.strftime("%Y%m%d_%H%M%S")
+        timestamped_folder = os.path.join(args.storage_folder, f"trajectory_{timestamp}")
+        os.makedirs(timestamped_folder, exist_ok=True)
+        print(f"Created timestamped subfolder: {timestamped_folder}")
+
+        # Update storage_folder to use the timestamped subfolder
+        args.storage_folder = timestamped_folder
+
     computer_mapping = {
         "local-playwright": LocalPlaywrightComputer,
         "docker": DockerComputer,
@@ -70,23 +153,28 @@ def main():
     ComputerClass = computer_mapping[args.computer]
 
     with ComputerClass() as computer:
-        agent = Agent(
-            computer=computer,
-            acknowledge_safety_check_callback=acknowledge_safety_check_callback,
+        system_prompt = SYSTEM_PROMPT
+        agent_kwargs = {
+            "initial_task": args.input, 
+            "system_prompt": system_prompt, 
+            "max_steps": 1000,
+            "storage_folder": args.storage_folder
+        }
+        agent = BranchingAgent(computer=computer, agent_kwargs=agent_kwargs)
+        agent.shared_context = args.input
+        agent.branch_instructions = []
+        for i in range(args.num_branches):
+            branch_instruction = f"branch {i+1}: try a different approach for solving the user task from this"
+            agent.branch_instructions.append(branch_instruction)
+        # Run branches with the computer-first approach
+        print("[bold blue]Running branches...[/]")
+        results = agent.run_branches(
+            instructions=agent.branch_instructions,
+            context=agent.shared_context
         )
-        items = []
-
-        while True:
-            user_input = args.input or input("> ")
-            items.append({"role": "user", "content": user_input})
-            output_items = agent.run_full_turn(
-                items,
-                print_steps=True,
-                show_images=args.show,
-                debug=args.debug,
-            )
-            items += output_items
-            args.input = None
+
+        # Display results
+        agent.display_results()
 
 
 if __name__ == "__main__":