Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const BUILTIN_TOOLS = [
"execute_python_code",
"browser_use",
"desktop_screenshot",
"view_image",
"read_file",
"write_file",
"edit_file",
Expand Down
1 change: 1 addition & 0 deletions console/src/pages/Settings/Security/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const BUILTIN_TOOLS = [
"execute_python_code",
"browser_use",
"desktop_screenshot",
"view_image",
"read_file",
"write_file",
"edit_file",
Expand Down
5 changes: 4 additions & 1 deletion src/copaw/agents/model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,10 @@ def _create_formatter_instance(
formatter_class = _create_file_block_support_formatter(
base_formatter_class,
)
return formatter_class()
kwargs: dict[str, Any] = {}
if issubclass(base_formatter_class, OpenAIChatFormatter):
kwargs["promote_tool_result_images"] = True
return formatter_class(**kwargs)


__all__ = [
Expand Down
2 changes: 2 additions & 0 deletions src/copaw/agents/react_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
read_file,
send_file_to_user,
set_user_timezone,
view_image,
write_file,
create_memory_search_tool,
)
Expand Down Expand Up @@ -187,6 +188,7 @@ def _create_toolkit(
"edit_file": edit_file,
"browser_use": browser_use,
"desktop_screenshot": desktop_screenshot,
"view_image": view_image,
"send_file_to_user": send_file_to_user,
"get_current_time": get_current_time,
"set_user_timezone": set_user_timezone,
Expand Down
2 changes: 2 additions & 0 deletions src/copaw/agents/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .send_file import send_file_to_user
from .browser_control import browser_use
from .desktop_screenshot import desktop_screenshot
from .view_image import view_image
from .memory_search import create_memory_search_tool
from .get_current_time import get_current_time, set_user_timezone
from .get_token_usage import get_token_usage
Expand All @@ -36,6 +37,7 @@
"glob_search",
"send_file_to_user",
"desktop_screenshot",
"view_image",
"browser_use",
"create_memory_search_tool",
"get_current_time",
Expand Down
81 changes: 81 additions & 0 deletions src/copaw/agents/tools/view_image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
"""Load an image file into the LLM context for visual analysis."""

import mimetypes
import os
import unicodedata
from pathlib import Path

from agentscope.message import ImageBlock, TextBlock
from agentscope.tool import ToolResponse

_IMAGE_EXTENSIONS = {
".png",
".jpg",
".jpeg",
".gif",
".webp",
".bmp",
".tiff",
".tif",
}


async def view_image(image_path: str) -> ToolResponse:
"""Load an image file into the LLM context so the model can see it.

Use this after desktop_screenshot, browser_use, or any tool that
produces an image file path.

Args:
image_path (`str`):
Path to the image file to view.

Returns:
`ToolResponse`:
An ImageBlock the model can inspect, or an error message.
"""
image_path = unicodedata.normalize(
"NFC",
os.path.expanduser(image_path),
)
resolved = Path(image_path).resolve()

if not resolved.exists() or not resolved.is_file():
return ToolResponse(
content=[
TextBlock(
type="text",
text=f"Error: {image_path} does not exist or "
"is not a file.",
),
],
)

ext = resolved.suffix.lower()
mime, _ = mimetypes.guess_type(str(resolved))
if ext not in _IMAGE_EXTENSIONS and (
not mime or not mime.startswith("image/")
):
return ToolResponse(
content=[
TextBlock(
type="text",
text=f"Error: {resolved.name} is not a supported "
"image format.",
),
],
)

return ToolResponse(
content=[
ImageBlock(
type="image",
source={"type": "url", "url": str(resolved)},
),
TextBlock(
type="text",
text=f"Image loaded: {resolved.name}",
),
],
)
8 changes: 8 additions & 0 deletions src/copaw/app/channels/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

from .renderer import MessageRenderer, RenderStyle
from .schema import ChannelType
from ...config.utils import load_config

# Optional callback to enqueue payload (set by manager)
EnqueueCallback = Optional[Callable[[Any], None]]
Expand Down Expand Up @@ -99,10 +100,17 @@ def __init__(
self.deny_message = deny_message or ""
self.require_mention = require_mention
self._enqueue: EnqueueCallback = None
cfg = load_config()
internal_tools = frozenset(
name
for name, tc in cfg.tools.builtin_tools.items()
if not tc.display_to_user
)
self._render_style = RenderStyle(
show_tool_details=show_tool_details,
filter_tool_messages=filter_tool_messages,
filter_thinking=filter_thinking,
internal_tools=internal_tools,
)
self._renderer = MessageRenderer(self._render_style)
self._http: Optional[Any] = None
Expand Down
20 changes: 15 additions & 5 deletions src/copaw/app/channels/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class RenderStyle:
use_emoji: bool = True
filter_tool_messages: bool = False
filter_thinking: bool = False
internal_tools: frozenset = frozenset()


def _fmt_tool_call(
Expand Down Expand Up @@ -192,11 +193,17 @@ def _parts_for_tool_output(content_list: list) -> List[_OutgoingPart]:
ContentType.VIDEO,
ContentType.FILE,
)
media_parts = [
p
for p in block_parts
if getattr(p, "type", None) in media_types
]
# Internal tools (e.g. view_image) produce
# media for the LLM, not the user — skip.
media_parts = (
[]
if name in s.internal_tools
else [
p
for p in block_parts
if getattr(p, "type", None) in media_types
]
)
out.extend(media_parts)
if not media_parts:
out.append(
Expand Down Expand Up @@ -265,6 +272,9 @@ def _parts_for_tool_output(content_list: list) -> List[_OutgoingPart]:
if getattr(c, "type", None) != ContentType.DATA:
continue
data = getattr(c, "data", None) or {}
name = data.get("name") or "tool"
if name in s.internal_tools:
continue
output = data.get("output", "")
try:
output = json.loads(output)
Expand Down
128 changes: 76 additions & 52 deletions src/copaw/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,66 +409,90 @@ class BuiltinToolConfig(BaseModel):
name: str = Field(..., description="Tool function name")
enabled: bool = Field(True, description="Whether the tool is enabled")
description: str = Field(default="", description="Tool description")
display_to_user: bool = Field(
True,
description="Whether tool output is rendered to user channels",
)


def _default_builtin_tools() -> Dict[str, BuiltinToolConfig]:
"""Return a fresh copy of the canonical built-in tool definitions."""
return {
"execute_shell_command": BuiltinToolConfig(
name="execute_shell_command",
enabled=True,
description="Execute shell commands",
),
"read_file": BuiltinToolConfig(
name="read_file",
enabled=True,
description="Read file contents",
),
"write_file": BuiltinToolConfig(
name="write_file",
enabled=True,
description="Write content to file",
),
"edit_file": BuiltinToolConfig(
name="edit_file",
enabled=True,
description="Edit file using find-and-replace",
),
"browser_use": BuiltinToolConfig(
name="browser_use",
enabled=True,
description="Browser automation and web interaction",
),
"desktop_screenshot": BuiltinToolConfig(
name="desktop_screenshot",
enabled=True,
description="Capture desktop screenshots",
),
"view_image": BuiltinToolConfig(
name="view_image",
enabled=True,
description="Load an image into LLM context "
"for visual analysis",
display_to_user=False,
),
"send_file_to_user": BuiltinToolConfig(
name="send_file_to_user",
enabled=True,
description="Send files to user",
),
"get_current_time": BuiltinToolConfig(
name="get_current_time",
enabled=True,
description="Get current date and time",
),
"set_user_timezone": BuiltinToolConfig(
name="set_user_timezone",
enabled=True,
description="Set user timezone",
),
"get_token_usage": BuiltinToolConfig(
name="get_token_usage",
enabled=True,
description="Get llm token usage",
),
}


class ToolsConfig(BaseModel):
"""Built-in tools management configuration."""

builtin_tools: Dict[str, BuiltinToolConfig] = Field(
default_factory=lambda: {
"execute_shell_command": BuiltinToolConfig(
name="execute_shell_command",
enabled=True,
description="Execute shell commands",
),
"read_file": BuiltinToolConfig(
name="read_file",
enabled=True,
description="Read file contents",
),
"write_file": BuiltinToolConfig(
name="write_file",
enabled=True,
description="Write content to file",
),
"edit_file": BuiltinToolConfig(
name="edit_file",
enabled=True,
description="Edit file using find-and-replace",
),
"browser_use": BuiltinToolConfig(
name="browser_use",
enabled=True,
description="Browser automation and web interaction",
),
"desktop_screenshot": BuiltinToolConfig(
name="desktop_screenshot",
enabled=True,
description="Capture desktop screenshots",
),
"send_file_to_user": BuiltinToolConfig(
name="send_file_to_user",
enabled=True,
description="Send files to user",
),
"get_current_time": BuiltinToolConfig(
name="get_current_time",
enabled=True,
description="Get current date and time",
),
"set_user_timezone": BuiltinToolConfig(
name="set_user_timezone",
enabled=True,
description="Set user timezone",
),
"get_token_usage": BuiltinToolConfig(
name="get_token_usage",
enabled=True,
description="Get llm token usage",
),
},
default_factory=_default_builtin_tools,
)

@model_validator(mode="after")
def _merge_default_tools(self):
"""Ensure new code-defined tools are present in saved configs."""
for name, tc in _default_builtin_tools().items():
if name not in self.builtin_tools:
self.builtin_tools[name] = tc
return self


class ToolGuardRuleConfig(BaseModel):
"""A single user-defined guard rule (stored in config.json)."""
Expand Down
Loading