Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .env_template
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,13 @@ BROWSER_PROXY_USER=
BROWSER_PROXY_PASSWORD=

# 如果模型不支持response_format,最好将下面的选项打开
ADD_SCHEMA_TO_SYSTEM_PROMPT=
ADD_SCHEMA_TO_SYSTEM_PROMPT=True
# 如果开启ADD_SCHEMA_TO_SYSTEM_PROMPT建议同步开启
INCLUDE_TOOL_CALL_EXAMPLES=True
# 在Browser Use新版中模型对模型上下文要求很高(尤其是输出部分),如果模型上下文较小,建议打开下面的选项
FLASH_MODE=True
# 在模型上下文较小的情况下,建议设置为1 (<16K)
MAX_TOKENS_PER_STEP=1
MAX_ACTIONS_PER_STEP=1

# USER_AGENT=

Expand Down
104 changes: 87 additions & 17 deletions app/cosight/tool/web_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def _env_float(name: str, default: float) -> float:
return default


def _env_int(name: str, default: Optional[int] = None) -> Optional[int]:
def _env_int(name: str, default: int = 1) -> int:
value = os.environ.get(name)
if value is None or not value.strip():
return default
Expand Down Expand Up @@ -130,7 +130,9 @@ async def create_browser_session():
server=proxy_url,
username=proxy_user,
password=proxy_password,
bypass=os.environ.get("BROWSER_PROXY_BYPASS", "localhost,127.0.0.1,*.internal"),
bypass=os.environ.get(
"BROWSER_PROXY_BYPASS", "localhost,127.0.0.1,*.internal"
),
)

profile = BrowserProfile(
Expand Down Expand Up @@ -219,6 +221,77 @@ async def _reset_browser_session(cls) -> None:
finally:
cls._shared_browser_session = None

@classmethod
def has_active_browser_session(cls) -> bool:
"""Check if there is an active browser session.

This method allows external agents to check if a browser session currently exists.

Returns:
bool: True if there is an active browser session, False otherwise.
"""
return cls._shared_browser_session is not None

@classmethod
def check_browser_session(cls, task_requires_browser: bool) -> str:
"""Check browser session status and auto-close if not needed for current task.

This method allows external actor_agent to:
1. Check if a browser session currently exists
2. Auto-close the browser if the current task does not require it

Args:
task_requires_browser (bool): Whether the current task requires browser interaction.
If False and a browser session exists, it will be closed automatically.

Returns:
str: A message indicating the browser session status and any actions taken.
"""
has_session = cls.has_active_browser_session()

if not has_session:
logger.info("No active browser session exists")
return "No active browser session exists"

if not task_requires_browser:
logger.info(
"Browser session exists but current task does not require it - auto-closing"
)
try:
_run_in_browser_loop(cls._reset_browser_session())
logger.info("Browser session auto-closed successfully")
return "Browser session existed but was not needed for current task - closed successfully"
except Exception as e:
logger.error(
f"Failed to auto-close browser session: {str(e)}", exc_info=True
)
return f"Failed to auto-close browser session: {str(e)}"
else:
logger.info("Browser session exists and is available for current task")
return "Browser session exists and is ready for use in current task"

@classmethod
def close_browser(cls) -> str:
"""Close the shared browser session.

This method allows external agents to explicitly close the browser when it's no longer needed.
For example, during the planning phase, if the agent determines that the current browser
session is no longer required, it can call this method to clean up resources.

Returns:
str: A message indicating whether the browser was closed successfully or if there was no active session.
"""
logger.info("External request to close shared browser session")
try:
_run_in_browser_loop(cls._reset_browser_session())
logger.info(
"Shared browser session closed successfully by external request"
)
return "Browser session closed successfully"
except Exception as e:
logger.error(f"Failed to close browser session: {str(e)}", exc_info=True)
return f"Failed to close browser session: {str(e)}"

def browser_use(self, task_prompt: str):
r"""A powerful toolkit which can simulate the browser interaction to solve the task which needs multi-step actions.

Expand Down Expand Up @@ -247,35 +320,32 @@ async def inner_browser_use(self, task_prompt):
try:
browser_session = await self._get_shared_browser_session()
if self._llm is None:
llm_kwargs = {**self.llm_config}
llm_kwargs.setdefault("temperature", 0.0)
llm_kwargs["add_schema_to_system_prompt"] = _env_bool(
"ADD_SCHEMA_TO_SYSTEM_PROMPT",
llm_kwargs.get("add_schema_to_system_prompt", True),
self._llm = ChatOpenAI(
**self.llm_config,
max_completion_tokens=8192,
temperature=0.0,
add_schema_to_system_prompt=_env_bool(
"ADD_SCHEMA_TO_SYSTEM_PROMPT", True
),
)
self._llm = ChatOpenAI(**llm_kwargs)
# 创建agent,复用共享的browser session
agent_kwargs: dict[str, Any] = dict(

agent = Agent(
task=task_prompt,
browser_session=browser_session, # 使用共享的browser session
llm=self._llm,
use_vision=False,
max_actions_per_step=1,
max_actions_per_step=_env_int("MAX_ACTIONS_PER_STEP", 1),
directly_open_url=False,
flash_mode=_env_bool("FLASH_MODE", True),
include_tool_call_examples=_env_bool("INCLUDE_TOOL_CALL_EXAMPLES", True),
extend_system_message="""
ADDITIONAL INSTRUCTIONS:
YOU **MUST** FOLLOW THESE INSTRUCTIONS:
- Your answers **MUST NOT** contain any of the markdown code blocks such as ``` or ```json.
- **Directly** return the final answer as a plain text **without any additional formatting**.
""",
)

max_tokens_per_step = _env_int("MAX_TOKENS_PER_STEP")
if max_tokens_per_step is not None:
agent_kwargs["max_tokens_per_step"] = max_tokens_per_step

agent = Agent(**agent_kwargs)

# 运行agent
result = await agent.run()
final_result = result.final_result()
Expand Down