diff --git a/backend/app/agent/factory/browser.py b/backend/app/agent/factory/browser.py index 161c809b4..39fa08519 100644 --- a/backend/app/agent/factory/browser.py +++ b/backend/app/agent/factory/browser.py @@ -226,7 +226,6 @@ def browser_agent(options: Chat): options.project_id, Agents.browser_agent, working_directory=working_directory, - safe_mode=True, clone_current_env=True, ) terminal_toolkit = message_integration.register_functions( diff --git a/backend/app/agent/factory/developer.py b/backend/app/agent/factory/developer.py index b8ff1bc5b..0ba9ae725 100644 --- a/backend/app/agent/factory/developer.py +++ b/backend/app/agent/factory/developer.py @@ -70,7 +70,6 @@ async def developer_agent(options: Chat): options.project_id, Agents.developer_agent, working_directory=working_directory, - safe_mode=True, clone_current_env=True, ) terminal_toolkit = message_integration.register_toolkits(terminal_toolkit) diff --git a/backend/app/agent/factory/document.py b/backend/app/agent/factory/document.py index ac7d9528c..3b0e68329 100644 --- a/backend/app/agent/factory/document.py +++ b/backend/app/agent/factory/document.py @@ -85,7 +85,6 @@ async def document_agent(options: Chat): options.project_id, Agents.document_agent, working_directory=working_directory, - safe_mode=True, clone_current_env=True, ) terminal_toolkit = message_integration.register_toolkits(terminal_toolkit) diff --git a/backend/app/agent/factory/multi_modal.py b/backend/app/agent/factory/multi_modal.py index d8c21fade..8f46c8d0e 100644 --- a/backend/app/agent/factory/multi_modal.py +++ b/backend/app/agent/factory/multi_modal.py @@ -65,7 +65,6 @@ def multi_modal_agent(options: Chat): options.project_id, agent_name=Agents.multi_modal_agent, working_directory=working_directory, - safe_mode=True, clone_current_env=True, ) terminal_toolkit = message_integration.register_toolkits(terminal_toolkit) diff --git a/backend/app/agent/toolkit/terminal_toolkit.py b/backend/app/agent/toolkit/terminal_toolkit.py index e22567d21..dfc1a1afe 100644 --- a/backend/app/agent/toolkit/terminal_toolkit.py +++ b/backend/app/agent/toolkit/terminal_toolkit.py @@ -19,6 +19,8 @@ import shutil import subprocess import threading +import time +import uuid from concurrent.futures import ThreadPoolExecutor from camel.toolkits.terminal_toolkit import ( @@ -28,17 +30,25 @@ from app.agent.toolkit.abstract_toolkit import AbstractToolkit from app.component.environment import env +from app.hitl.terminal_command import ( + is_dangerous_command, + validate_cd_within_working_dir, +) +from app.model.enums import ApprovalAction from app.service.task import ( Action, + ActionCommandApprovalData, ActionTerminalData, Agents, get_task_lock, + get_task_lock_if_exists, process_task, ) from app.utils.listen.toolkit_listen import auto_listen_toolkit logger = logging.getLogger("terminal_toolkit") + # App version - should match electron app version # TODO: Consider getting this from a shared config APP_VERSION = "0.0.85" @@ -58,7 +68,7 @@ def get_terminal_base_venv_path() -> str: class TerminalToolkit(BaseTerminalToolkit, AbstractToolkit): agent_name: str = Agents.developer_agent _thread_pool: ThreadPoolExecutor | None = None - _thread_local = threading.local() + _thread_local: threading.local = threading.local() def __init__( self, @@ -69,7 +79,6 @@ def __init__( use_docker_backend: bool = False, docker_container_name: str | None = None, session_logs_dir: str | None = None, - safe_mode: bool = True, allowed_commands: list[str] | None = None, clone_current_env: bool = True, ): @@ -100,22 +109,30 @@ def __init__( max_workers=1, thread_name_prefix="terminal_toolkit" ) + self._use_docker_backend = use_docker_backend + self._working_directory = working_directory + + # safe_mode is read fresh from task_lock in shell_exec (see + # _get_terminal_approval), but we need an initial value for + # super().__init__. + task_lock = get_task_lock_if_exists(api_task_id) + terminal_approval = ( + task_lock.hitl_options.terminal_approval if task_lock else False + ) + camel_safe_mode = not terminal_approval super().__init__( timeout=timeout, working_directory=working_directory, use_docker_backend=use_docker_backend, docker_container_name=docker_container_name, session_logs_dir=session_logs_dir, - safe_mode=safe_mode, + safe_mode=camel_safe_mode, allowed_commands=allowed_commands, clone_current_env=True, install_dependencies=[], ) # Auto-register with TaskLock for cleanup when task ends - from app.service.task import get_task_lock_if_exists - - task_lock = get_task_lock_if_exists(api_task_id) if task_lock: task_lock.register_toolkit(self) logger.info( @@ -349,7 +366,96 @@ def _run_coro_in_thread(coro, task_lock): exc_info=True, ) - def shell_exec( + def _get_terminal_approval(self) -> bool: + """Read terminal_approval from task_lock on every call. + + This ensures the setting takes effect immediately when the user + toggles it between tasks (the task_lock is updated at POST /chat). + Also syncs Camel's safe_mode so it stays consistent. + """ + task_lock = get_task_lock_if_exists(self.api_task_id) + enabled = ( + task_lock.hitl_options.terminal_approval if task_lock else False + ) + self.safe_mode = not enabled + return enabled + + async def _request_user_approval(self, action_data) -> str | None: + """Send a command approval request to the frontend and wait. + + Flow:: + + _request_user_approval (agent coroutine) + 1. create_approval(approval_id) → Future stored, agent will await it + 2. put_queue(action_data) → drops SSE event into shared queue + 3. await future → agent suspends + + SSE generator in chat_service.py (independently) + 4. get_queue() → picks up the event + 5. yield sse_json(...) → sends command_approval to frontend + + Frontend + 6. shows approval card + + User clicks "Approve Once" / "Auto Approve" / "Reject" + 7. POST /approval → resolve_approval() or resolve_all_approvals_for_agent() + 8. future.set_result(...) → agent resumes at step 3 + + Args: + action_data (ActionCommandApprovalData): SSE payload containing + the command and agent name. ``approval_id`` is injected into + ``action_data.data`` before the event is queued. + + Returns: + None if the command is approved (approve_once or auto_approve). + An error string if the command is rejected. + """ + task_lock = get_task_lock(self.api_task_id) + if task_lock.auto_approve.get(self.agent_name, False): + return None + + # Each concurrent call gets its own Future keyed by approval_id. + # Use str concatenation (not f-string) so that Enum values like + # Agents.developer_agent produce "developer_agent_..." instead of + # "Agents.developer_agent_..." — the latter breaks startswith() + # matching in resolve_all_approvals_for_agent. + approval_id = self.agent_name + "_" + uuid.uuid4().hex[:12] + action_data.data["approval_id"] = approval_id + future = task_lock.create_approval(approval_id) + + logger.info( + "[APPROVAL] Pushing approval event to SSE queue, " + "api_task_id=%s, agent=%s, approval_id=%s", + self.api_task_id, + self.agent_name, + approval_id, + ) + + await task_lock.put_queue(action_data) + + logger.info("[APPROVAL] Event pushed, waiting for user response") + + approval = await future + + logger.info("[APPROVAL] Received response: %s", approval) + + # Re-check: another concurrent call may have set auto_approve + # while this call was waiting on its Future. + if task_lock.auto_approve.get(self.agent_name, False): + return None + + if approval == ApprovalAction.approve_once: + return None + if approval == ApprovalAction.auto_approve: + task_lock.auto_approve[self.agent_name] = True + # Unblock all other pending approvals for this agent + task_lock.resolve_all_approvals_for_agent( + self.agent_name, ApprovalAction.auto_approve + ) + return None + return "Operation rejected by user. The task is being stopped." + + async def shell_exec( self, command: str, id: str | None = None, @@ -358,6 +464,25 @@ def shell_exec( ) -> str: r"""Executes a shell command in blocking or non-blocking mode. + When HITL terminal approval is on, commands that require user + confirmation trigger an approval request before execution. + + .. note:: Async override of a sync base method + + Camel's ``BaseTerminalToolkit.shell_exec`` is **sync-only** (no + async variant exists in the upstream library). We override it + as ``async def`` so the HITL approval flow can ``await`` the + SSE queue and the approval-response queue — following the same + asyncio.Queue pattern used by ``HumanToolkit.ask_human_via_gui``. + + Because the base method is sync and this override is async, the + ``listen_toolkit`` decorator applies a ``__wrapped__`` fix (see + ``toolkit_listen.py``) to ensure Camel's ``FunctionTool`` + dispatches this method via ``async_call`` on the **main** event + loop, rather than via the sync ``__call__`` path which would run + it on a background loop where cross-loop ``asyncio.Queue`` awaits + silently fail. + Args: command (str): The shell command to execute. id (str, optional): A unique identifier for the command's session. @@ -368,12 +493,28 @@ def shell_exec( Returns: str: The output of the command execution. """ - # Auto-generate ID if not provided if id is None: - import time - id = f"auto_{int(time.time() * 1000)}" + if not self._use_docker_backend: + ok, err = validate_cd_within_working_dir( + command, self._working_directory + ) + if not ok: + return err or "cd not allowed." + + terminal_approval = self._get_terminal_approval() + is_dangerous = ( + is_dangerous_command(command) if terminal_approval else False + ) + if terminal_approval and is_dangerous: + approval_data = ActionCommandApprovalData( + data={"command": command, "agent": self.agent_name} + ) + rejection = await self._request_user_approval(approval_data) + if rejection is not None: + return rejection + result = super().shell_exec( id=id, command=command, block=block, timeout=timeout ) diff --git a/backend/app/controller/chat_controller.py b/backend/app/controller/chat_controller.py index 87e42a70a..d467e4405 100644 --- a/backend/app/controller/chat_controller.py +++ b/backend/app/controller/chat_controller.py @@ -26,6 +26,7 @@ from app.component import code from app.component.environment import sanitize_env_path, set_user_env_path from app.exception.exception import UserException +from app.hitl.config import ApprovalRequest from app.model.chat import ( AddTaskRequest, Chat, @@ -35,6 +36,7 @@ SupplementChat, sse_json, ) +from app.model.enums import ApprovalAction from app.service.chat_service import step_solve from app.service.task import ( Action, @@ -176,6 +178,7 @@ async def post(data: Chat, request: Request): ) task_lock = get_or_create_task_lock(data.project_id) + task_lock.hitl_options = data.hitl_options # Set user-specific environment path for this thread set_user_env_path(data.env_path) @@ -258,6 +261,10 @@ def improve(id: str, data: SupplementChat): ) task_lock = get_task_lock(id) + # Update HITL options if provided (user may have changed settings) + if data.hitl_options is not None: + task_lock.hitl_options = data.hitl_options + # Allow continuing conversation even after task is done # This supports multi-turn conversation after complex task completion if task_lock.status == Status.done: @@ -416,6 +423,39 @@ def human_reply(id: str, data: HumanReply): return Response(status_code=201) +@router.post("/chat/{id}/approval") +def approval(id: str, data: ApprovalRequest): + """Handle user approval response for a command requiring confirmation.""" + chat_logger.info( + "Approval received", + extra={ + "task_id": id, + "agent": data.agent, + "approval": data.approval, + "approval_id": data.approval_id, + }, + ) + task_lock = get_task_lock(id) + + if data.approval == ApprovalAction.auto_approve: + # Set flag and resolve ALL pending approvals for this agent + task_lock.auto_approve[data.agent] = True + task_lock.resolve_all_approvals_for_agent( + data.agent, ApprovalAction.auto_approve + ) + elif data.approval == ApprovalAction.reject: + # Resolve ALL pending for this agent (skip-task cleanup handles rest) + task_lock.resolve_all_approvals_for_agent( + data.agent, ApprovalAction.reject + ) + else: + # approve_once: resolve only the specific request + task_lock.resolve_approval(data.approval_id, data.approval) + + chat_logger.debug("Approval processed", extra={"task_id": id}) + return Response(status_code=201) + + @router.post("/chat/{id}/install-mcp") def install_mcp(id: str, data: McpServers): chat_logger.info( diff --git a/backend/app/hitl/__init__.py b/backend/app/hitl/__init__.py new file mode 100644 index 000000000..fa7455a0c --- /dev/null +++ b/backend/app/hitl/__init__.py @@ -0,0 +1,13 @@ +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= diff --git a/backend/app/hitl/config.py b/backend/app/hitl/config.py new file mode 100644 index 000000000..47b7f3899 --- /dev/null +++ b/backend/app/hitl/config.py @@ -0,0 +1,27 @@ +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= + +from pydantic import BaseModel + +from app.model.enums import ApprovalAction + + +class HitlOptions(BaseModel): + terminal_approval: bool = False + + +class ApprovalRequest(BaseModel): + approval: ApprovalAction + agent: str + approval_id: str = "" diff --git a/backend/app/hitl/terminal_command.py b/backend/app/hitl/terminal_command.py new file mode 100644 index 000000000..b20a09dda --- /dev/null +++ b/backend/app/hitl/terminal_command.py @@ -0,0 +1,268 @@ +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= + +import os +import re + +# Dangerous commands that require user approval (issue #1306) +DANGEROUS_COMMAND_TOKENS = frozenset( + { + # System Administration + "sudo", + "su", + "reboot", + "shutdown", + "halt", + "poweroff", + "init", + # File System + "rm", + "chown", + "chgrp", + "chmod", + "umount", + "mount", + # Disk Operations + "dd", + "mkfs", + "fdisk", + "parted", + "fsck", + "mkswap", + "swapon", + "swapoff", + # Process Management + "service", + "systemctl", + "systemd", + "kill", + "pkill", + "killall", + # Network Configuration + "iptables", + "ip6tables", + "ifconfig", + "route", + "iptables-save", + # Cron/Scheduling + "crontab", + "at", + "batch", + # User/Kernel Management + "useradd", + "userdel", + "usermod", + "passwd", + "chpasswd", + "newgrp", + "modprobe", + "rmmod", + "insmod", + "lsmod", + } +) + +# Commands that wrap/prefix the real command +_WRAPPER_COMMANDS = frozenset( + { + "env", + "bash", + "sh", + "nohup", + "time", + "nice", + "command", + "exec", + "xargs", + } +) + +# Regex to split on shell operators: &&, ||, ;, | +# Note: this is intentionally naive about quoted strings — false positives +# (flagging safe commands) are acceptable for a safety check. +_SHELL_OPERATOR_RE = re.compile(r"\s*(?:&&|\|\||[;|\n])\s*") + +# Regex to detect heredoc start: < str: + """Remove heredoc body content from a command string. + + Heredoc bodies are stdin data, not shell commands, so they should + not be scanned for dangerous tokens. Only the shell command line + (before the ``< list[str]: + """Split a command string on shell operators (&&, ||, ;, |). + + Args: + command: Shell command string, possibly compound. + + Returns: + List of individual simple commands. + """ + return [ + part.strip() + for part in _SHELL_OPERATOR_RE.split(command) + if part.strip() + ] + + +def extract_effective_command(simple_command: str) -> str | None: + """Find the effective executable in a simple command. + + Strips wrapper commands (env, nohup, bash …) and path prefixes + to return the first real command token. + + Args: + simple_command: A single (non-compound) command string. + + Returns: + Basename of the effective command, or None if empty. + """ + parts = simple_command.strip().split() + if not parts: + return None + idx = 0 + while idx < len(parts): + token = parts[idx] + # Strip surrounding quotes: "rm -> rm + token = token.strip("\"'") + # Strip path prefix: /usr/bin/sudo -> sudo + basename = token.rsplit("/", 1)[-1] + if basename in _WRAPPER_COMMANDS: + idx += 1 + # Skip flags and their arguments (e.g. -c "cmd", -n 19), + # KEY=VALUE pairs (e.g. env FOO=bar), and pure numbers. + while idx < len(parts): + arg = parts[idx] + if arg.startswith("-"): + idx += 1 + # Short flags like -n, -c may take a value argument; + # skip the next non-flag token as the argument. + if ( + idx < len(parts) + and not parts[idx].startswith("-") + and len(arg) == 2 + ): + idx += 1 + elif _ENV_VAR_RE.match(arg): + idx += 1 + else: + break + continue + return basename + return None + + +def is_dangerous_command(command: str) -> bool: + """Check whether any sub-command is dangerous and requires approval. + + Splits on shell operators, strips heredoc bodies, then checks the + effective executable of each sub-command against DANGEROUS_COMMAND_TOKENS. + + Args: + command: Full shell command string, possibly compound. + + Returns: + True if at least one sub-command is dangerous. + """ + command = _strip_heredoc_bodies(command) + for sub_cmd in split_compound_command(command): + # First non-wrapper token, e.g. "env sudo rm -rf /" → "sudo" + effective = extract_effective_command(sub_cmd) + if effective and effective in DANGEROUS_COMMAND_TOKENS: + return True + return False + + +def validate_cd_within_working_dir( + command: str, working_directory: str +) -> tuple[bool, str | None]: + """Validate that no ``cd`` sub-command escapes working_directory. + + Args: + command: Full shell command string, possibly compound. + working_directory: Allowed root directory. + + Returns: + (True, None) if allowed, (False, error_message) if not. + """ + work_real = os.path.realpath(os.path.abspath(working_directory)) + current_dir = work_real + + for sub_cmd in split_compound_command(command): + parts = sub_cmd.strip().split() + if not parts: + continue + # Check if this sub-command is a cd + basename = parts[0].rsplit("/", 1)[-1] + if basename != "cd": + continue + target = parts[1] if len(parts) > 1 else "" + # cd with no args or "cd ~" -> home; treat as potential escape + if not target or target == "~": + target = os.path.expanduser("~") + elif target == "-": + # "cd -" is previous dir; cannot validate statically, allow it + continue + try: + if os.path.isabs(target): + resolved = os.path.realpath(os.path.abspath(target)) + else: + resolved = os.path.realpath( + os.path.abspath(os.path.join(current_dir, target)) + ) + if os.path.commonpath([resolved, work_real]) != work_real: + return ( + False, + f"cd not allowed: path would escape working directory " + f"({working_directory}).", + ) + current_dir = resolved + except (OSError, ValueError): + return False, "cd not allowed: invalid path." + return True, None diff --git a/backend/app/model/chat.py b/backend/app/model/chat.py index 8f13a5aa4..9f2d65ccb 100644 --- a/backend/app/model/chat.py +++ b/backend/app/model/chat.py @@ -21,7 +21,12 @@ from camel.types import ModelType, RoleType from pydantic import BaseModel, Field, field_validator -from app.model.enums import DEFAULT_SUMMARY_PROMPT, Status # noqa: F401 +from app.hitl.config import HitlOptions +from app.model.enums import ( # noqa: F401 + DEFAULT_SUMMARY_PROMPT, + ApprovalAction, + Status, +) from app.model.model_platform import ( NormalizedModelPlatform, NormalizedOptionalModelPlatform, @@ -65,6 +70,7 @@ class Chat(BaseModel): cdp_browsers: list[dict] = Field(default_factory=list) max_retries: int = 3 allow_local_system: bool = False + hitl_options: HitlOptions = HitlOptions() installed_mcp: McpServers = {"mcpServers": {}} bun_mirror: str = "" uvx_mirror: str = "" @@ -141,6 +147,7 @@ class SupplementChat(BaseModel): question: str task_id: str | None = None attaches: list[str] = [] + hitl_options: HitlOptions | None = None class HumanReply(BaseModel): diff --git a/backend/app/model/enums.py b/backend/app/model/enums.py index efd93b703..ac6f13569 100644 --- a/backend/app/model/enums.py +++ b/backend/app/model/enums.py @@ -22,6 +22,14 @@ class Status(str, Enum): done = "done" +class ApprovalAction(str, Enum): + """User response choices for HITL command approval prompts.""" + + approve_once = "approve_once" + auto_approve = "auto_approve" + reject = "reject" + + DEFAULT_SUMMARY_PROMPT = ( "After completing the task, please generate" " a summary of the entire task completion. " diff --git a/backend/app/service/chat_service.py b/backend/app/service/chat_service.py index 41c912ab6..1d314e582 100644 --- a/backend/app/service/chat_service.py +++ b/backend/app/service/chat_service.py @@ -1033,6 +1033,8 @@ async def run_decomposition(): # questions (don't break, don't # delete task_lock) elif item.action == Action.start: + # Reset per-agent auto-approve flags for the new task + task_lock.auto_approve = {} # Check conversation history length before starting task is_exceeded, total_length = check_conversation_history_length( task_lock @@ -1530,6 +1532,12 @@ def on_stream_text(chunk): "process_task_id": item.process_task_id, }, ) + elif item.action == Action.command_approval: + logger.info( + "[APPROVAL] SSE yielding command_approval event, data=%s", + item.data, + ) + yield sse_json("command_approval", item.data) elif item.action == Action.pause: if workforce is not None: workforce.pause() @@ -2388,7 +2396,6 @@ async def new_agent_model(data: NewAgent | ActionNewAgent, options: Chat): options.project_id, agent_name=data.name, working_directory=working_directory, - safe_mode=True, clone_current_env=True, ) tools.extend(terminal_toolkit.get_tools()) diff --git a/backend/app/service/task.py b/backend/app/service/task.py index 604fbc717..b84170d6b 100644 --- a/backend/app/service/task.py +++ b/backend/app/service/task.py @@ -26,13 +26,14 @@ from typing_extensions import TypedDict from app.exception.exception import ProgramException +from app.hitl.config import HitlOptions from app.model.chat import ( AgentModelConfig, McpServers, SupplementChat, UpdateData, ) -from app.model.enums import Status +from app.model.enums import ApprovalAction, Status logger = logging.getLogger("task_service") @@ -58,6 +59,7 @@ class Action(str, Enum): search_mcp = "search_mcp" # backend -> user install_mcp = "install_mcp" # backend -> user terminal = "terminal" # backend -> user + command_approval = "command_approval" # backend -> user (approval) end = "end" # backend -> user stop = "stop" # user -> backend supplement = "supplement" # user -> backend @@ -219,6 +221,19 @@ class ActionTerminalData(BaseModel): data: str +class ActionCommandApprovalData(BaseModel): + """SSE payload sent to the frontend to prompt for command approval. + + Args: + action: SSE event type (currently ``command_approval``). + data: Contains ``command`` (the shell command) and ``agent`` + (the agent name requesting approval). + """ + + action: Literal[Action.command_approval] = Action.command_approval + data: dict[Literal["command", "agent"], str] + + class ActionStopData(BaseModel): action: Literal[Action.stop] = Action.stop @@ -296,6 +311,7 @@ class ActionSkipTaskData(BaseModel): | ActionSearchMcpData | ActionInstallMcpData | ActionTerminalData + | ActionCommandApprovalData | ActionStopData | ActionEndData | ActionTimeoutData @@ -370,6 +386,17 @@ def __init__( self.question_agent = None self.current_task_id = None + # Human-in-the-loop settings (e.g. terminal command approval) + self.hitl_options: HitlOptions = HitlOptions() + + # Per-call Futures for user approval responses, keyed by approval_id. + # Each _request_user_approval call creates its own Future so + # concurrent calls from the same agent don't compete. + self.pending_approvals: dict[str, asyncio.Future[str]] = {} + # Per-agent auto-approve: skip further prompts for this agent. + # Reset at each task start (Action.start) in chat_service. + self.auto_approve: dict[str, bool] = {} + logger.info( "Task lock initialized", extra={"task_id": id, "created_at": self.created_at.isoformat()}, @@ -407,6 +434,103 @@ async def get_human_input(self, agent: str): ) return await self.human_input[agent].get() + def create_approval(self, approval_id: str) -> asyncio.Future[str]: + """Create a Future for a pending approval request. + + Args: + approval_id: Unique ID for this request (format: ``{agent}_{hex}``). + + Returns: + A Future that resolves with the user's approval action string. + """ + loop = asyncio.get_running_loop() + future = loop.create_future() + self.pending_approvals[approval_id] = future + logger.debug( + "Created approval future", + extra={"task_id": self.id, "approval_id": approval_id}, + ) + return future + + @staticmethod + def _set_future_result_if_pending( + future: asyncio.Future[str], action: str + ) -> None: + """Set future result if it is still pending.""" + if not future.done(): + future.set_result(action) + + def _resolve_future_threadsafe( + self, future: asyncio.Future[str], action: str + ) -> None: + """Resolve an approval future safely from any thread.""" + if future.done(): + return + + future.get_loop().call_soon_threadsafe( + self._set_future_result_if_pending, future, action + ) + + def resolve_approval(self, approval_id: str, action: str): + """Resolve a single pending approval by its ID. + + Design — "Approve Once": + The frontend sends the exact ``approval_id`` that arrived in the + SSE event. Only the one Future that matches is resolved; all + other concurrent approvals for the same agent remain pending and + will surface as the next item in the frontend queue. + + Args: + approval_id (str): Unique ID of the approval to resolve + (format: ``{agent}_{hex}``). + action (str): The approval action string (e.g. + ``ApprovalAction.approve_once``). + """ + future = self.pending_approvals.pop(approval_id, None) + if future: + self._resolve_future_threadsafe(future, action) + logger.debug( + "Resolved approval", + extra={ + "task_id": self.id, + "approval_id": approval_id, + "action": action, + }, + ) + + def resolve_all_approvals_for_agent(self, agent: str, action: str): + """Resolve all pending approvals for a given agent. + + Design — "Auto Approve" / "Reject": + Both actions apply to every pending command from this agent, not + just the one the user is looking at. + + - Auto Approve: sets ``auto_approve[agent] = True`` (caller's + responsibility) so future commands skip the prompt entirely, + then calls this method to unblock any coroutines already + suspended on ``await future``. + - Reject: calls this method to unblock suspended coroutines, then + the frontend issues a skip-task request to stop the whole task. + ``cleanup_for_stop`` will catch any Futures that slip through + between the two calls. + + The ``approval_id`` format ``{agent}_{hex}`` lets us filter by prefix. + + Args: + agent (str): Agent name whose pending approvals should all be + resolved (e.g. ``"developer_agent"``). + action (str): The approval action string to set on every matched + Future (e.g. ``ApprovalAction.auto_approve``). + """ + to_remove = [ + aid + for aid in self.pending_approvals + if aid.startswith(agent + "_") + ] + for aid in to_remove: + future = self.pending_approvals.pop(aid) + self._resolve_future_threadsafe(future, action) + def add_human_input_listen(self, agent: str): logger.debug( "Adding human input listener", @@ -435,6 +559,15 @@ async def cleanup(self): "background_tasks_count": len(self.background_tasks), }, ) + + # Unblock any coroutine awaiting approval (e.g. shell_exec + # waiting on a Future) by resolving all pending with "reject". + # This lets _request_user_approval return gracefully instead + # of hanging forever after the task is stopped. + for future in self.pending_approvals.values(): + self._resolve_future_threadsafe(future, ApprovalAction.reject) + self.pending_approvals.clear() + for task in list(self.background_tasks): if not task.done(): task.cancel() diff --git a/backend/app/utils/listen/toolkit_listen.py b/backend/app/utils/listen/toolkit_listen.py index 1c2c9bbc6..1df3dd4de 100644 --- a/backend/app/utils/listen/toolkit_listen.py +++ b/backend/app/utils/listen/toolkit_listen.py @@ -294,6 +294,12 @@ def batch_create(self, path: str) -> list: """ def decorator(func: Callable[..., Any]): + # `wrap` is the function whose metadata (name, signature, docstring) + # we copy onto the wrapper via @wraps. When auto_listen_toolkit + # decorates an overridden method it passes the *base class* method + # as wrap_method so that Camel's FunctionTool picks up the original + # signature. If the caller didn't supply wrap_method (e.g. a manual + # @listen_toolkit() on a new method), we just use func itself. wrap = func if wrap_method is None else wrap_method if iscoroutinefunction(func): @@ -355,6 +361,37 @@ async def async_wrapper(*args, **kwargs): return res async_wrapper.__listen_toolkit__ = True + + # Why this matters: + # @wraps(wrap) copies __wrapped__ from `wrap` onto the wrapper. + # When `wrap` is a *sync* base-class method (e.g. the original + # sync BaseTerminalToolkit.shell_exec) but `func` is an *async* + # override (e.g. our async TerminalToolkit.shell_exec), the + # wrapper ends up with __wrapped__ pointing to the sync base. + # + # Camel's FunctionTool uses inspect.unwrap() + iscoroutinefunction() + # to decide the dispatch path: + # - iscoroutinefunction → True → async_call (runs on main loop) + # - iscoroutinefunction → False → __call__ (persistent bg loop) + # + # If __wrapped__ points to the sync base, unwrap() returns it, + # iscoroutinefunction() returns False, and Camel dispatches the + # async function onto a *different* event loop — breaking any + # cross-loop asyncio.Queue await (e.g. the approval flow). + # + # Fix: override __wrapped__ to point to the actual async `func` + # so that Camel correctly dispatches via async_call on the main + # loop. The metadata (name, signature, docstring) is still + # preserved from `wrap` by @wraps; only the unwrap chain changes. + # + # This only triggers when wrap != func, i.e. when a subclass + # provides an async override of a sync base method — a pattern + # first introduced for TerminalToolkit.shell_exec to support + # the HITL approval flow (Camel's upstream shell_exec is + # sync-only with no async variant available). + if wrap is not func: + async_wrapper.__wrapped__ = func + return async_wrapper else: diff --git a/backend/tests/app/agent/toolkit/test_terminal_toolkit.py b/backend/tests/app/agent/toolkit/test_terminal_toolkit.py index dc060fba0..37bfe0fe5 100644 --- a/backend/tests/app/agent/toolkit/test_terminal_toolkit.py +++ b/backend/tests/app/agent/toolkit/test_terminal_toolkit.py @@ -15,11 +15,19 @@ import asyncio import threading import time +from unittest.mock import AsyncMock, patch import pytest from app.agent.toolkit.terminal_toolkit import TerminalToolkit -from app.service.task import TaskLock, task_locks +from app.hitl.config import HitlOptions +from app.model.enums import ApprovalAction +from app.service.task import ( + ActionCommandApprovalData, + Agents, + TaskLock, + task_locks, +) @pytest.mark.unit @@ -125,3 +133,750 @@ async def test_async_context(): ) else: raise + + +def _make_task_lock(task_id: str) -> TaskLock: + """Create a TaskLock and register it in the global dict.""" + tl = TaskLock(id=task_id, queue=asyncio.Queue(), human_input={}) + task_locks[task_id] = tl + return tl + + +def _make_action_data( + command: str = "rm -rf /", agent: str = "test_agent" +) -> ActionCommandApprovalData: + return ActionCommandApprovalData(data={"command": command, "agent": agent}) + + +async def _feed_approval( + tl: TaskLock, action: str +) -> ActionCommandApprovalData: + """Read one approval event from the SSE queue and resolve it. + + Simulates the frontend receiving the SSE ``command_approval`` event + and responding with the given *action*. Returns the SSE item for + optional assertions. + """ + sse_item = await asyncio.wait_for(tl.queue.get(), timeout=2.0) + approval_id = sse_item.data["approval_id"] + tl.resolve_approval(approval_id, action) + return sse_item + + +class _ConcreteToolkit(TerminalToolkit): + """Lightweight subclass that skips the heavy TerminalToolkit.__init__.""" + + def __init__(self, api_task_id: str, agent_name: str = "test_agent"): + self.api_task_id = api_task_id + self.agent_name = agent_name + + +@pytest.mark.unit +class TestRequestUserApproval: + """Tests for TerminalToolkit._request_user_approval.""" + + @pytest.mark.asyncio + async def test_approve_once_returns_none(self): + """approve_once should let the operation proceed (return None).""" + task_id = "approval_test_once" + tl = _make_task_lock(task_id) + toolkit = _ConcreteToolkit(task_id) + + result, _ = await asyncio.gather( + toolkit._request_user_approval(_make_action_data()), + _feed_approval(tl, ApprovalAction.approve_once), + ) + assert result is None + + @pytest.mark.asyncio + async def test_auto_approve_returns_none_and_sets_flag(self): + """auto_approve should proceed and set the flag for future calls.""" + task_id = "approval_test_auto" + tl = _make_task_lock(task_id) + toolkit = _ConcreteToolkit(task_id) + + result, _ = await asyncio.gather( + toolkit._request_user_approval(_make_action_data()), + _feed_approval(tl, ApprovalAction.auto_approve), + ) + assert result is None + assert tl.auto_approve["test_agent"] is True + + @pytest.mark.asyncio + async def test_auto_approve_skips_subsequent_prompts(self): + """Once auto_approve is set, subsequent calls skip the Future entirely.""" + task_id = "approval_test_auto_skip" + tl = _make_task_lock(task_id) + tl.auto_approve["test_agent"] = True + toolkit = _ConcreteToolkit(task_id) + + # Should NOT block because auto_approve bypasses the Future + result = await toolkit._request_user_approval(_make_action_data()) + assert result is None + + @pytest.mark.asyncio + async def test_reject_returns_error_message(self): + """reject should return an error string (task will be stopped by frontend).""" + task_id = "approval_test_reject" + tl = _make_task_lock(task_id) + toolkit = _ConcreteToolkit(task_id) + + result, _ = await asyncio.gather( + toolkit._request_user_approval(_make_action_data()), + _feed_approval(tl, ApprovalAction.reject), + ) + assert result is not None + assert "rejected" in result.lower() + + @pytest.mark.asyncio + async def test_unknown_value_treated_as_reject(self): + """Unrecognised approval values should be treated as rejection (fail-closed).""" + task_id = "approval_test_unknown" + tl = _make_task_lock(task_id) + toolkit = _ConcreteToolkit(task_id) + + result, _ = await asyncio.gather( + toolkit._request_user_approval(_make_action_data()), + _feed_approval(tl, "some_garbage_value"), + ) + assert result is not None + assert "rejected" in result.lower() + + @pytest.mark.asyncio + async def test_pushes_action_data_to_sse_queue(self): + """_request_user_approval should push action_data to the SSE queue.""" + task_id = "approval_test_sse" + tl = _make_task_lock(task_id) + toolkit = _ConcreteToolkit(task_id) + + action_data = _make_action_data("echo danger") + + async def feed_and_verify(): + sse_item = await asyncio.wait_for(tl.queue.get(), timeout=2.0) + # The SSE item should be the same object we passed in + assert sse_item is action_data + # approval_id should have been injected + assert "approval_id" in sse_item.data + tl.resolve_approval( + sse_item.data["approval_id"], ApprovalAction.approve_once + ) + return sse_item + + result, sse_item = await asyncio.gather( + toolkit._request_user_approval(action_data), + feed_and_verify(), + ) + assert result is None + assert sse_item is action_data + + @pytest.mark.asyncio + async def test_concurrent_approvals_isolated(self): + """Two agents sharing the same TaskLock have independent approval + Futures — each agent's approval is routed to the correct Future. + """ + task_id = "approval_test_concurrent" + tl = _make_task_lock(task_id) + + agent_a = _ConcreteToolkit(task_id, "agent_a") + agent_b = _ConcreteToolkit(task_id, "agent_b") + + results = {} + + async def request_a(): + results["a"] = await agent_a._request_user_approval( + _make_action_data("rm -rf /", agent="agent_a") + ) + + async def request_b(): + results["b"] = await agent_b._request_user_approval( + _make_action_data("drop table users", agent="agent_b") + ) + + async def feed_responses(): + items = {} + for _ in range(2): + item = await asyncio.wait_for(tl.queue.get(), timeout=2.0) + items[item.data["agent"]] = item.data["approval_id"] + # Approve agent_b, reject agent_a (opposite of insertion order) + tl.resolve_approval(items["agent_b"], ApprovalAction.approve_once) + tl.resolve_approval(items["agent_a"], ApprovalAction.reject) + + await asyncio.gather(request_a(), request_b(), feed_responses()) + + # Agent A got reject + assert results["a"] is not None + assert "rejected" in results["a"].lower() + # Agent B got approve + assert results["b"] is None + + @pytest.mark.asyncio + async def test_three_agents_mixed_responses(self): + """Three agents get approve, reject, auto_approve respectively.""" + task_id = "approval_test_three" + tl = _make_task_lock(task_id) + + dev = _ConcreteToolkit(task_id, "developer_agent") + browser = _ConcreteToolkit(task_id, "browser_agent") + doc = _ConcreteToolkit(task_id, "document_agent") + + results = {} + + async def req_dev(): + results["dev"] = await dev._request_user_approval( + _make_action_data("rm -rf /tmp", agent="developer_agent") + ) + + async def req_browser(): + results["browser"] = await browser._request_user_approval( + _make_action_data("sudo apt update", agent="browser_agent") + ) + + async def req_doc(): + results["doc"] = await doc._request_user_approval( + _make_action_data("kill -9 1", agent="document_agent") + ) + + async def feed(): + items = {} + for _ in range(3): + item = await asyncio.wait_for(tl.queue.get(), timeout=2.0) + items[item.data["agent"]] = item.data["approval_id"] + tl.resolve_approval(items["browser_agent"], ApprovalAction.reject) + tl.resolve_approval( + items["document_agent"], ApprovalAction.auto_approve + ) + tl.resolve_approval( + items["developer_agent"], ApprovalAction.approve_once + ) + + await asyncio.gather(req_dev(), req_browser(), req_doc(), feed()) + + assert results["dev"] is None # approved + assert results["browser"] is not None # rejected + assert "rejected" in results["browser"].lower() + assert results["doc"] is None # auto-approved + + @pytest.mark.asyncio + async def test_auto_approve_is_per_agent(self): + """auto_approve for one agent does NOT affect another.""" + task_id = "approval_test_per_agent_auto" + tl = _make_task_lock(task_id) + + agent_a = _ConcreteToolkit(task_id, "agent_a") + agent_b = _ConcreteToolkit(task_id, "agent_b") + + # Auto-approve agent_a via response + result_a, _ = await asyncio.gather( + agent_a._request_user_approval( + _make_action_data("rm -rf /", agent="agent_a") + ), + _feed_approval(tl, ApprovalAction.auto_approve), + ) + assert result_a is None + assert tl.auto_approve.get("agent_a") is True + + # agent_a now skips entirely (auto-approved) + result_a2 = await agent_a._request_user_approval( + _make_action_data("sudo reboot", agent="agent_a") + ) + assert result_a2 is None + + # agent_b is NOT auto-approved — it must still wait on a Future + assert tl.auto_approve.get("agent_b", False) is False + result_b, _ = await asyncio.gather( + agent_b._request_user_approval( + _make_action_data("sudo rm -rf /", agent="agent_b") + ), + _feed_approval(tl, ApprovalAction.reject), + ) + assert result_b is not None + assert "rejected" in result_b.lower() + + @pytest.mark.asyncio + async def test_auto_approve_survives_dict_reset(self): + """Resetting auto_approve to {} (as chat_service does on Action.start) + must not break subsequent approval calls. + + Regression: chat_service.py previously reset auto_approve = False + (a bool), causing ``task_lock.auto_approve.get(...)`` to raise + ``AttributeError: 'bool' object has no attribute 'get'``. + """ + task_id = "approval_test_reset" + tl = _make_task_lock(task_id) + toolkit = _ConcreteToolkit(task_id) + + # 1. Grant auto_approve for this agent + r1, _ = await asyncio.gather( + toolkit._request_user_approval(_make_action_data()), + _feed_approval(tl, ApprovalAction.auto_approve), + ) + assert r1 is None + assert tl.auto_approve["test_agent"] is True + + # 2. Simulate the reset that chat_service does on Action.start + tl.auto_approve = {} + + # 3. auto_approve flag is cleared — agent must wait on Future again + assert tl.auto_approve.get("test_agent", False) is False + + # 4. Must wait on Future again + r2, _ = await asyncio.gather( + toolkit._request_user_approval(_make_action_data()), + _feed_approval(tl, ApprovalAction.approve_once), + ) + assert r2 is None + + @pytest.mark.asyncio + async def test_multiple_sequential_approvals_same_agent(self): + """One agent can request approval multiple times sequentially.""" + task_id = "approval_test_sequential" + tl = _make_task_lock(task_id) + toolkit = _ConcreteToolkit(task_id, "dev") + + # First command: approved + r1, _ = await asyncio.gather( + toolkit._request_user_approval( + _make_action_data("rm /tmp/a", agent="dev") + ), + _feed_approval(tl, ApprovalAction.approve_once), + ) + assert r1 is None + + # Second command: rejected + r2, _ = await asyncio.gather( + toolkit._request_user_approval( + _make_action_data("rm /tmp/b", agent="dev") + ), + _feed_approval(tl, ApprovalAction.reject), + ) + assert r2 is not None + + # Third command: approved again + r3, _ = await asyncio.gather( + toolkit._request_user_approval( + _make_action_data("rm /tmp/c", agent="dev") + ), + _feed_approval(tl, ApprovalAction.approve_once), + ) + assert r3 is None + + @pytest.mark.asyncio + async def test_concurrent_same_agent_approve_all(self): + """Multiple concurrent approvals from same agent resolve independently. + + This tests the core bug fix: when the same agent triggers multiple + dangerous commands concurrently, each gets its own Future and can + be approved independently. + """ + task_id = "concurrent_same_agent_all" + tl = _make_task_lock(task_id) + + toolkits = [_ConcreteToolkit(task_id, "dev_agent") for _ in range(3)] + results = {} + + async def request(idx): + results[idx] = await toolkits[idx]._request_user_approval( + _make_action_data(f"cmd_{idx}", agent="dev_agent") + ) + + async def feed_all(): + for _ in range(3): + item = await asyncio.wait_for(tl.queue.get(), timeout=2.0) + tl.resolve_approval( + item.data["approval_id"], ApprovalAction.approve_once + ) + + await asyncio.gather(request(0), request(1), request(2), feed_all()) + + assert all(results[i] is None for i in range(3)) + + @pytest.mark.asyncio + async def test_concurrent_same_agent_auto_approve_unblocks_siblings(self): + """Auto-approving one request unblocks all pending from the same agent. + + This tests the key scenario: 3 subtasks from the same agent all + need approval, user clicks auto-approve on the first one, the + other 2 should resolve automatically. + """ + task_id = "concurrent_auto_unblock" + tl = _make_task_lock(task_id) + + toolkits = [_ConcreteToolkit(task_id, "dev_agent") for _ in range(3)] + results = {} + + async def request(idx): + results[idx] = await toolkits[idx]._request_user_approval( + _make_action_data(f"cmd_{idx}", agent="dev_agent") + ) + + async def feed_auto(): + # Wait for all 3 to push to the SSE queue + for _ in range(3): + await asyncio.wait_for(tl.queue.get(), timeout=2.0) + # Auto-approve ONE — the toolkit code will + # call resolve_all_approvals_for_agent to resolve the rest + approval_ids = list(tl.pending_approvals.keys()) + tl.resolve_approval(approval_ids[0], ApprovalAction.auto_approve) + + await asyncio.gather(request(0), request(1), request(2), feed_auto()) + + assert all(results[i] is None for i in range(3)) + assert tl.auto_approve.get("dev_agent") is True + + @pytest.mark.asyncio + async def test_approval_id_uses_enum_value_not_repr(self): + """approval_id must use the Enum *value* (e.g. ``developer_agent``), + not the repr (``Agents.developer_agent``). + + Regression: f-string formatting of a ``str, Enum`` member calls + ``__format__`` which returns ``Agents.developer_agent``. String + concatenation (``+``) correctly uses the underlying str value. + If the prefix mismatches, ``resolve_all_approvals_for_agent`` + silently matches nothing and all pending Futures hang forever. + """ + task_id = "approval_id_enum_value" + tl = _make_task_lock(task_id) + # Use the real Agents enum — the exact type used in production + toolkit = _ConcreteToolkit(task_id, Agents.developer_agent) + + action_data = _make_action_data( + "rm -rf /tmp/test", agent="developer_agent" + ) + + async def verify_prefix(): + sse_item = await asyncio.wait_for(tl.queue.get(), timeout=2.0) + approval_id = sse_item.data["approval_id"] + # Must start with the plain value, NOT "Agents.developer_agent_" + assert approval_id.startswith("developer_agent_"), ( + f"approval_id {approval_id!r} has wrong prefix — " + "Enum __format__ was used instead of str value" + ) + assert not approval_id.startswith("Agents."), ( + f"approval_id {approval_id!r} contains Enum class name" + ) + tl.resolve_approval(approval_id, ApprovalAction.approve_once) + + result, _ = await asyncio.gather( + toolkit._request_user_approval(action_data), + verify_prefix(), + ) + assert result is None + + @pytest.mark.asyncio + async def test_auto_approve_with_enum_agent_name(self): + """Auto-approve must work when agent_name is an Agents enum member. + + Regression: resolve_all_approvals_for_agent uses + ``aid.startswith(agent + "_")`` which relies on str concatenation + producing the enum value. If approval_id was built with an + f-string, the prefix would be ``Agents.developer_agent_`` while + the lookup would search for ``developer_agent_`` — no match. + """ + task_id = "auto_approve_enum" + tl = _make_task_lock(task_id) + + toolkits = [ + _ConcreteToolkit(task_id, Agents.developer_agent) for _ in range(3) + ] + results = {} + + async def request(idx): + results[idx] = await toolkits[idx]._request_user_approval( + _make_action_data(f"cmd_{idx}", agent="developer_agent") + ) + + async def feed_auto(): + for _ in range(3): + await asyncio.wait_for(tl.queue.get(), timeout=2.0) + # Simulate what the controller does: resolve with the plain + # string agent name (as received from the frontend JSON). + tl.resolve_all_approvals_for_agent( + "developer_agent", ApprovalAction.auto_approve + ) + + await asyncio.gather(request(0), request(1), request(2), feed_auto()) + + assert all(results[i] is None for i in range(3)) + + @pytest.mark.asyncio + async def test_controller_resolve_matches_toolkit_approval_id(self): + """The controller's resolve path must match the toolkit's approval_id. + + End-to-end: toolkit creates approval_id with Enum agent_name, + controller resolves with plain string agent name from frontend. + Both ``resolve_approval`` (approve_once) and + ``resolve_all_approvals_for_agent`` (auto/reject) must work. + """ + task_id = "controller_resolve_match" + tl = _make_task_lock(task_id) + toolkit = _ConcreteToolkit(task_id, Agents.developer_agent) + + # --- approve_once path: exact approval_id round-trip --- + action_data = _make_action_data("rm /x", agent="developer_agent") + + async def feed_approve_once(): + sse_item = await asyncio.wait_for(tl.queue.get(), timeout=2.0) + # Controller receives this exact approval_id from the frontend + tl.resolve_approval( + sse_item.data["approval_id"], ApprovalAction.approve_once + ) + + r1, _ = await asyncio.gather( + toolkit._request_user_approval(action_data), + feed_approve_once(), + ) + assert r1 is None + + # --- reject path: bulk resolve by agent string --- + action_data2 = _make_action_data("rm /y", agent="developer_agent") + + async def feed_reject(): + await asyncio.wait_for(tl.queue.get(), timeout=2.0) + # Controller sends plain string "developer_agent" from JSON + tl.resolve_all_approvals_for_agent( + "developer_agent", ApprovalAction.reject + ) + + r2, _ = await asyncio.gather( + toolkit._request_user_approval(action_data2), + feed_reject(), + ) + assert r2 is not None + assert "rejected" in r2.lower() + + +@pytest.mark.unit +class TestTerminalApprovalGating: + """Tests that hitl_options.terminal_approval controls whether approval is requested.""" + + @pytest.mark.asyncio + async def test_terminal_approval_off_skips_approval(self): + """When terminal_approval=False, dangerous commands run without approval.""" + task_id = "approval_off_test" + _make_task_lock(task_id) # default: terminal_approval=False + toolkit = TerminalToolkit(task_id, "test_agent") + + with ( + patch.object( + toolkit, "_request_user_approval", new_callable=AsyncMock + ) as mock_approval, + patch( + "app.agent.toolkit.terminal_toolkit.TerminalToolkit.shell_exec", + return_value="done", + ), + ): + assert toolkit._get_terminal_approval() is False + mock_approval.assert_not_called() + + @pytest.mark.asyncio + async def test_terminal_approval_on_triggers_approval(self): + """When terminal_approval=True, dangerous commands trigger approval.""" + task_id = "approval_on_test" + tl = _make_task_lock(task_id) + tl.hitl_options = HitlOptions(terminal_approval=True) + toolkit = TerminalToolkit(task_id, "test_agent") + + from app.hitl.terminal_command import is_dangerous_command + + assert toolkit._get_terminal_approval() is True + assert is_dangerous_command("rm -rf /tmp/test") is True + + def test_terminal_approval_default_is_false(self): + """TerminalToolkit defaults to terminal_approval=False.""" + task_id = "approval_default_test" + _make_task_lock(task_id) + toolkit = TerminalToolkit(task_id, "test_agent") + assert toolkit._get_terminal_approval() is False + + def test_terminal_approval_reads_from_task_lock(self): + """TerminalToolkit reads terminal_approval from TaskLock on the fly.""" + task_id = "approval_read_test" + tl = _make_task_lock(task_id) + toolkit = TerminalToolkit(task_id, "test_agent") + assert toolkit._get_terminal_approval() is False + # Change setting after init — should reflect immediately + tl.hitl_options = HitlOptions(terminal_approval=True) + assert toolkit._get_terminal_approval() is True + + def test_terminal_approval_false_never_detects_dangerous(self): + """With terminal_approval=False, the is_dangerous check is skipped.""" + task_id = "approval_skip_test" + _make_task_lock(task_id) + toolkit = TerminalToolkit(task_id, "test_agent") + + from app.hitl.terminal_command import is_dangerous_command + + # The command IS dangerous... + assert is_dangerous_command("rm -rf /") is True + # ...but the gating logic would produce False + terminal_approval = toolkit._get_terminal_approval() + is_dangerous = ( + is_dangerous_command("rm -rf /") if terminal_approval else False + ) + assert is_dangerous is False + + +@pytest.mark.unit +class TestFollowUpTaskApproval: + """Tests for HITL approval behaviour across follow-up tasks. + + When a user sends a follow-up question in the same project the backend + receives a ``supplement`` action that may carry updated ``hitl_options``. + These tests verify that changing the setting between tasks takes effect + immediately — the bug fixed by ``_get_terminal_approval()`` reading from + ``task_lock`` on the fly rather than caching the value in ``__init__``. + """ + + def test_enable_approval_between_tasks(self): + """Turning approval ON after task 1 should gate task 2 commands.""" + task_id = "followup_enable" + tl = _make_task_lock(task_id) + toolkit = _ConcreteToolkit(task_id) + + # Task 1: approval is OFF (default) + assert toolkit._get_terminal_approval() is False + + # User navigates to settings and enables approval before task 2 + tl.hitl_options = HitlOptions(terminal_approval=True) + + # Task 2: same toolkit instance, but setting now reads True + assert toolkit._get_terminal_approval() is True + + def test_disable_approval_between_tasks(self): + """Turning approval OFF after task 1 should let commands run freely.""" + task_id = "followup_disable" + tl = _make_task_lock(task_id) + tl.hitl_options = HitlOptions(terminal_approval=True) + toolkit = _ConcreteToolkit(task_id) + + # Task 1: approval is ON + assert toolkit._get_terminal_approval() is True + + # User disables approval before task 2 + tl.hitl_options = HitlOptions(terminal_approval=False) + + # Task 2: setting now reads False + assert toolkit._get_terminal_approval() is False + + def test_safe_mode_synced_on_toggle(self): + """safe_mode on the Camel base class must stay in sync with the toggle.""" + task_id = "followup_safe_mode" + tl = _make_task_lock(task_id) + toolkit = TerminalToolkit(task_id, "test_agent") + + # Default: approval OFF → safe_mode ON + assert toolkit._get_terminal_approval() is False + assert toolkit.safe_mode is True + + # Enable approval → safe_mode OFF + tl.hitl_options = HitlOptions(terminal_approval=True) + assert toolkit._get_terminal_approval() is True + assert toolkit.safe_mode is False + + # Disable again → safe_mode ON + tl.hitl_options = HitlOptions(terminal_approval=False) + assert toolkit._get_terminal_approval() is False + assert toolkit.safe_mode is True + + def test_auto_approve_reset_between_tasks(self): + """auto_approve flags must be cleared when a new task starts.""" + task_id = "followup_auto_reset" + tl = _make_task_lock(task_id) + toolkit = _ConcreteToolkit(task_id) + + # Task 1: agent gets auto-approved + tl.auto_approve["test_agent"] = True + assert tl.auto_approve.get("test_agent") is True + + # Simulate Action.start for task 2 — chat_service resets auto_approve + tl.auto_approve = {} + + # Task 2: auto_approve should be cleared + assert tl.auto_approve.get("test_agent", False) is False + + @pytest.mark.asyncio + async def test_approval_required_after_setting_enabled_mid_session(self): + """Full flow: approval OFF in task 1, ON in task 2, command triggers approval.""" + task_id = "followup_full_flow" + tl = _make_task_lock(task_id) + tl.hitl_options = HitlOptions(terminal_approval=False) + toolkit = _ConcreteToolkit(task_id) + + from app.hitl.terminal_command import is_dangerous_command + + # Task 1: dangerous command NOT gated + terminal_approval = toolkit._get_terminal_approval() + is_dangerous = ( + is_dangerous_command("rm -rf /tmp/data") + if terminal_approval + else False + ) + assert is_dangerous is False + + # User enables approval before task 2 + tl.hitl_options = HitlOptions(terminal_approval=True) + # Reset auto_approve as chat_service would + tl.auto_approve = {} + + # Task 2: same command IS now gated + terminal_approval = toolkit._get_terminal_approval() + is_dangerous = ( + is_dangerous_command("rm -rf /tmp/data") + if terminal_approval + else False + ) + assert is_dangerous is True + + # Approval flow works correctly + result, _ = await asyncio.gather( + toolkit._request_user_approval(_make_action_data()), + _feed_approval(tl, ApprovalAction.approve_once), + ) + assert result is None + + @pytest.mark.asyncio + async def test_reject_after_setting_enabled_mid_session(self): + """Rejection still works when approval is enabled between tasks.""" + task_id = "followup_reject" + tl = _make_task_lock(task_id) + tl.hitl_options = HitlOptions(terminal_approval=False) + toolkit = _ConcreteToolkit(task_id) + + # Enable approval before task 2 + tl.hitl_options = HitlOptions(terminal_approval=True) + + assert toolkit._get_terminal_approval() is True + + result, _ = await asyncio.gather( + toolkit._request_user_approval(_make_action_data()), + _feed_approval(tl, ApprovalAction.reject), + ) + assert result is not None + assert "rejected" in result.lower() + + @pytest.mark.asyncio + async def test_auto_approve_does_not_carry_over_to_next_task(self): + """auto_approve granted in task 1 must not persist into task 2.""" + task_id = "followup_auto_no_carry" + tl = _make_task_lock(task_id) + tl.hitl_options = HitlOptions(terminal_approval=True) + toolkit = _ConcreteToolkit(task_id) + + # Task 1: grant auto_approve + r1, _ = await asyncio.gather( + toolkit._request_user_approval(_make_action_data()), + _feed_approval(tl, ApprovalAction.auto_approve), + ) + assert r1 is None + assert tl.auto_approve["test_agent"] is True + + # Simulate new task start — reset auto_approve + tl.auto_approve = {} + + # Task 2: auto_approve cleared, must wait on Future again + r2, _ = await asyncio.gather( + toolkit._request_user_approval(_make_action_data()), + _feed_approval(tl, ApprovalAction.reject), + ) + assert r2 is not None + assert "rejected" in r2.lower() diff --git a/backend/tests/app/controller/test_chat_controller.py b/backend/tests/app/controller/test_chat_controller.py index 5366b6dc3..39480024c 100644 --- a/backend/tests/app/controller/test_chat_controller.py +++ b/backend/tests/app/controller/test_chat_controller.py @@ -22,6 +22,7 @@ from pydantic import ValidationError from app.controller.chat_controller import ( + approval, human_reply, improve, install_mcp, @@ -30,7 +31,15 @@ supplement, ) from app.exception.exception import UserException -from app.model.chat import Chat, HumanReply, McpServers, Status, SupplementChat +from app.hitl.config import ApprovalRequest +from app.model.chat import ( + Chat, + HumanReply, + McpServers, + Status, + SupplementChat, +) +from app.model.enums import ApprovalAction @pytest.mark.unit @@ -238,6 +247,67 @@ def test_install_mcp_success(self, mock_task_lock): assert response.status_code == 201 mock_run.assert_called_once() + def test_approval_success(self, mock_task_lock): + """Test successful approval endpoint (approve_once resolves one Future).""" + task_id = "test_task_123" + request_data = ApprovalRequest( + approval=ApprovalAction.approve_once, + agent="dev_agent", + approval_id="dev_agent_abc123", + ) + + with patch( + "app.controller.chat_controller.get_task_lock", + return_value=mock_task_lock, + ): + response = approval(task_id, request_data) + + assert isinstance(response, Response) + assert response.status_code == 201 + mock_task_lock.resolve_approval.assert_called_once_with( + "dev_agent_abc123", ApprovalAction.approve_once + ) + + def test_approval_reject(self, mock_task_lock): + """Test approval endpoint with reject (resolves ALL for this agent).""" + task_id = "test_task_123" + request_data = ApprovalRequest( + approval=ApprovalAction.reject, agent="dev_agent" + ) + + with patch( + "app.controller.chat_controller.get_task_lock", + return_value=mock_task_lock, + ): + response = approval(task_id, request_data) + + assert isinstance(response, Response) + assert response.status_code == 201 + mock_task_lock.resolve_all_approvals_for_agent.assert_called_once_with( + "dev_agent", ApprovalAction.reject + ) + + def test_approval_auto_approve(self, mock_task_lock): + """Test approval endpoint with auto_approve (sets flag + resolves ALL).""" + task_id = "test_task_123" + mock_task_lock.auto_approve = {} + request_data = ApprovalRequest( + approval=ApprovalAction.auto_approve, agent="dev_agent" + ) + + with patch( + "app.controller.chat_controller.get_task_lock", + return_value=mock_task_lock, + ): + response = approval(task_id, request_data) + + assert isinstance(response, Response) + assert response.status_code == 201 + assert mock_task_lock.auto_approve["dev_agent"] is True + mock_task_lock.resolve_all_approvals_for_agent.assert_called_once_with( + "dev_agent", ApprovalAction.auto_approve + ) + @pytest.mark.integration class TestChatControllerIntegration: @@ -371,6 +441,44 @@ def test_install_mcp_endpoint_integration(self, client: TestClient): assert response.status_code == 201 + def test_approval_endpoint_integration(self, client: TestClient): + """Test approval endpoint through FastAPI test client.""" + task_id = "test_task_123" + approval_data = { + "approval": "approve_once", + "agent": "dev_agent", + "approval_id": "dev_agent_abc123", + } + + with patch( + "app.controller.chat_controller.get_task_lock" + ) as mock_get_lock: + mock_task_lock = MagicMock() + mock_get_lock.return_value = mock_task_lock + + response = client.post( + f"/chat/{task_id}/approval", json=approval_data + ) + + assert response.status_code == 201 + + def test_approval_endpoint_reject_integration(self, client: TestClient): + """Test approval endpoint with reject through FastAPI test client.""" + task_id = "test_task_123" + approval_data = {"approval": "reject", "agent": "dev_agent"} + + with patch( + "app.controller.chat_controller.get_task_lock" + ) as mock_get_lock: + mock_task_lock = MagicMock() + mock_get_lock.return_value = mock_task_lock + + response = client.post( + f"/chat/{task_id}/approval", json=approval_data + ) + + assert response.status_code == 201 + @pytest.mark.model_backend class TestChatControllerWithLLM: diff --git a/backend/tests/app/hitl/__init__.py b/backend/tests/app/hitl/__init__.py new file mode 100644 index 000000000..fa7455a0c --- /dev/null +++ b/backend/tests/app/hitl/__init__.py @@ -0,0 +1,13 @@ +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= diff --git a/backend/tests/app/hitl/test_terminal_command.py b/backend/tests/app/hitl/test_terminal_command.py new file mode 100644 index 000000000..07a5f3ef5 --- /dev/null +++ b/backend/tests/app/hitl/test_terminal_command.py @@ -0,0 +1,434 @@ +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= + + +from app.hitl.terminal_command import ( + _strip_heredoc_bodies, + extract_effective_command, + is_dangerous_command, + split_compound_command, + validate_cd_within_working_dir, +) + +# --- split_compound_command --- + + +def test_split_compound_simple(): + assert split_compound_command("ls -la") == ["ls -la"] + + +def test_split_compound_and(): + result = split_compound_command("echo foo && rm -rf /") + assert len(result) == 2 + assert result[0] == "echo foo" + assert result[1] == "rm -rf /" + + +def test_split_compound_or(): + result = split_compound_command("ls || sudo reboot") + assert len(result) == 2 + + +def test_split_compound_semicolon(): + result = split_compound_command("echo hello; rm -rf /") + assert len(result) == 2 + + +def test_split_compound_pipe(): + result = split_compound_command("cat file | sudo tee /etc/passwd") + assert len(result) == 2 + + +def test_split_compound_empty(): + assert split_compound_command("") == [] + + +def test_split_compound_whitespace(): + assert split_compound_command(" ") == [] + + +# --- extract_effective_command --- + + +def test_extract_simple(): + assert extract_effective_command("rm -rf /") == "rm" + + +def test_extract_with_path_prefix(): + assert extract_effective_command("/usr/bin/rm -rf /") == "rm" + + +def test_extract_with_sbin_path(): + assert extract_effective_command("/sbin/reboot") == "reboot" + + +def test_extract_wrapper_env(): + assert extract_effective_command("env rm -rf /") == "rm" + + +def test_extract_wrapper_bash_c(): + # extract_effective_command uses str.split() which cannot handle shell + # quoting, so `bash -c "rm -rf /"` is mis-parsed. This is acceptable + # because is_dangerous_command (which uses all-token scanning) catches it. + assert extract_effective_command('bash -c "rm -rf /"') is not None + + +def test_extract_wrapper_nohup(): + assert extract_effective_command("nohup sudo shutdown") == "sudo" + + +def test_extract_wrapper_time(): + assert extract_effective_command("time rm -rf /tmp/data") == "rm" + + +def test_extract_wrapper_nice(): + assert extract_effective_command("nice -n 19 dd if=/dev/zero") == "dd" + + +def test_extract_wrapper_command(): + assert extract_effective_command("command sudo reboot") == "sudo" + + +def test_extract_nested_wrappers(): + assert extract_effective_command("env nohup sudo rm -rf /") == "sudo" + + +def test_extract_env_with_var(): + assert extract_effective_command("env FOO=bar rm -rf /") == "rm" + + +def test_extract_safe_command(): + assert extract_effective_command("ls -la") == "ls" + + +def test_extract_empty(): + assert extract_effective_command("") is None + + +def test_extract_whitespace(): + assert extract_effective_command(" ") is None + + +# --- is_dangerous_command --- + + +def test_dangerous_simple_rm(): + assert is_dangerous_command("rm -rf /") is True + + +def test_dangerous_simple_sudo(): + assert is_dangerous_command("sudo apt update") is True + + +def test_dangerous_kill(): + assert is_dangerous_command("kill -9 1234") is True + + +def test_dangerous_pkill(): + assert is_dangerous_command("pkill python") is True + + +def test_dangerous_killall(): + assert is_dangerous_command("killall node") is True + + +def test_dangerous_chmod(): + assert is_dangerous_command("chmod 777 /etc/passwd") is True + + +def test_dangerous_with_path_prefix(): + assert is_dangerous_command("/usr/bin/sudo ls") is True + + +def test_dangerous_env_wrapper(): + assert is_dangerous_command("env rm -rf /") is True + + +def test_dangerous_bash_c_wrapper(): + # bash -c "rm -rf /" — extract_effective_command cannot parse + # shell quoting so the inner "rm" is not detected. This is a + # known limitation; in practice agents rarely use this pattern. + assert is_dangerous_command('bash -c "rm -rf /"') is False + + +def test_dangerous_nohup_wrapper(): + assert is_dangerous_command("nohup sudo shutdown -h now") is True + + +def test_dangerous_compound_and(): + assert is_dangerous_command("echo foo && rm -rf /") is True + + +def test_dangerous_compound_or(): + assert is_dangerous_command("ls || sudo reboot") is True + + +def test_dangerous_compound_semicolon(): + assert is_dangerous_command("echo hello; rm -rf /") is True + + +def test_dangerous_compound_pipe(): + assert is_dangerous_command("cat file | sudo tee /etc/passwd") is True + + +def test_dangerous_first_safe_second_dangerous(): + assert is_dangerous_command("cd /tmp && rm -rf /") is True + + +def test_safe_simple_ls(): + assert is_dangerous_command("ls -la") is False + + +def test_safe_compound(): + assert is_dangerous_command("echo hello && ls -la") is False + + +def test_safe_env_with_safe_command(): + assert is_dangerous_command("env python script.py") is False + + +def test_safe_empty(): + assert is_dangerous_command("") is False + + +def test_safe_whitespace(): + assert is_dangerous_command(" ") is False + + +# --- validate_cd_within_working_dir --- + + +def test_cd_within_dir_allowed(tmp_path): + sub = tmp_path / "sub" + sub.mkdir() + ok, err = validate_cd_within_working_dir(f"cd {sub}", str(tmp_path)) + assert ok is True + assert err is None + + +def test_cd_escape_rejected(tmp_path): + ok, err = validate_cd_within_working_dir("cd /tmp", str(tmp_path)) + assert ok is False + assert "escape" in err.lower() + + +def test_cd_parent_traversal_rejected(tmp_path): + ok, err = validate_cd_within_working_dir("cd ../..", str(tmp_path)) + assert ok is False + + +def test_cd_no_args_rejected(tmp_path): + # cd with no args goes to home, which is outside tmp_path + ok, err = validate_cd_within_working_dir("cd", str(tmp_path)) + assert ok is False + + +def test_cd_tilde_rejected(tmp_path): + ok, err = validate_cd_within_working_dir("cd ~", str(tmp_path)) + assert ok is False + + +def test_cd_dash_allowed(tmp_path): + ok, err = validate_cd_within_working_dir("cd -", str(tmp_path)) + assert ok is True + assert err is None + + +def test_non_cd_command_allowed(tmp_path): + ok, err = validate_cd_within_working_dir("ls -la", str(tmp_path)) + assert ok is True + assert err is None + + +def test_cd_compound_second_escapes(tmp_path): + sub = tmp_path / "sub" + sub.mkdir() + ok, err = validate_cd_within_working_dir( + f"cd {sub} && cd /tmp", str(tmp_path) + ) + assert ok is False + + +def test_cd_compound_all_within_dir(tmp_path): + sub1 = tmp_path / "a" + sub2 = tmp_path / "b" + sub1.mkdir() + sub2.mkdir() + ok, err = validate_cd_within_working_dir( + f"cd {sub1} && cd {sub2}", str(tmp_path) + ) + assert ok is True + assert err is None + + +def test_cd_compound_relative_progression_allowed(tmp_path): + """Relative cd commands should be evaluated from the updated cwd.""" + sub = tmp_path / "sub" + sub.mkdir() + ok, err = validate_cd_within_working_dir("cd sub && cd ..", str(tmp_path)) + assert ok is True + assert err is None + + +def test_cd_relative_within_dir(tmp_path): + sub = tmp_path / "sub" + sub.mkdir() + ok, err = validate_cd_within_working_dir("cd sub", str(tmp_path)) + assert ok is True + assert err is None + + +def test_cd_dot_stays_in_dir(tmp_path): + ok, err = validate_cd_within_working_dir("cd .", str(tmp_path)) + assert ok is True + assert err is None + + +def test_cd_symlink_escape_rejected(tmp_path): + link = tmp_path / "link" + link.symlink_to("/tmp") + ok, err = validate_cd_within_working_dir(f"cd {link}", str(tmp_path)) + assert ok is False + + +# --- is_dangerous_command (additional edge cases) --- + + +def test_safe_dangerous_token_as_argument(): + # "echo rm" — only the effective command ("echo") is checked, + # so "rm" as an argument does not trigger a false positive. + assert is_dangerous_command("echo rm") is False + + +def test_dangerous_substring_not_flagged(): + # "removal" contains "rm" as a substring but is NOT the token "rm" + assert is_dangerous_command("echo removal") is False + + +def test_dangerous_quoted_token(): + assert is_dangerous_command('"sudo" ls') is True + + +def test_dangerous_additional_tokens(): + # Spot-check tokens from each category that aren't tested above + assert is_dangerous_command("reboot") is True + assert is_dangerous_command("dd if=/dev/zero of=/dev/sda") is True + assert is_dangerous_command("crontab -e") is True + assert is_dangerous_command("useradd testuser") is True + assert is_dangerous_command("iptables -F") is True + + +def test_safe_common_commands(): + assert is_dangerous_command("cat file.txt") is False + assert is_dangerous_command("grep -r pattern .") is False + assert is_dangerous_command("python script.py") is False + assert is_dangerous_command("git status") is False + assert is_dangerous_command("npm install") is False + + +# --- split_compound_command (additional edge cases) --- + + +def test_split_compound_mixed_operators(): + result = split_compound_command("a && b || c; d | e") + assert len(result) == 5 + + +# --- extract_effective_command (additional edge cases) --- + + +def test_extract_env_multiple_vars(): + assert extract_effective_command("env A=1 B=2 C=3 rm -rf /") == "rm" + + +def test_extract_sh_c_wrapper(): + # sh -c behaves like bash -c + assert extract_effective_command('sh -c "rm -rf /"') is not None + + +def test_extract_exec_wrapper(): + assert extract_effective_command("exec sudo reboot") == "sudo" + + +# --- _strip_heredoc_bodies --- + + +def test_strip_heredoc_single_quoted(): + cmd = "python3 - <<'PY'\nimport json\nprint('hello')\nPY" + assert _strip_heredoc_bodies(cmd).strip() == "python3 -" + + +def test_strip_heredoc_double_quoted(): + cmd = 'cat <<"EOF"\nsome text\nEOF' + assert _strip_heredoc_bodies(cmd).strip() == "cat" + + +def test_strip_heredoc_unquoted(): + cmd = "cat < str: + return cmd + + @listen_toolkit(wrap_method=sync_base) + async def async_override(self, cmd: str) -> str: + return cmd + + # The wrapper itself should be a coroutine function + assert iscoroutinefunction(async_override) + + # inspect.unwrap should resolve to the async override, NOT the sync base + unwrapped = unwrap(async_override) + assert iscoroutinefunction(unwrapped), ( + "__wrapped__ should point to the async func, not the sync base. " + "Without the fix, Camel dispatches this on the wrong event loop." + ) + assert unwrapped is async_override or unwrapped is not sync_base + + +@pytest.mark.unit +def test_wrapped_unchanged_when_no_wrap_method(): + """When no wrap_method is provided (func == wrap), __wrapped__ should + follow the standard @wraps behavior — pointing back to func itself. + """ + + @listen_toolkit() + async def some_method(self) -> str: + return "ok" + + unwrapped = unwrap(some_method) + assert iscoroutinefunction(unwrapped) + + +@pytest.mark.unit +def test_wrapped_unchanged_for_sync_func_with_sync_wrap(): + """When both wrap_method and func are sync, __wrapped__ should be normal.""" + + def sync_base(self) -> str: + return "base" + + @listen_toolkit(wrap_method=sync_base) + def sync_override(self) -> str: + return "override" + + # sync wrapper should not have the async __wrapped__ override + assert not iscoroutinefunction(sync_override) + unwrapped = unwrap(sync_override) + assert not iscoroutinefunction(unwrapped) + + +@pytest.mark.unit +def test_wrapper_preserves_metadata_from_wrap_method(): + """Even with the __wrapped__ fix, the wrapper should preserve + the name/signature from wrap_method (the sync base), which is + what Camel's FunctionTool reads for parameter introspection. + """ + + def sync_base(self, command: str, timeout: int = 30) -> str: + """Execute a shell command.""" + return command + + @listen_toolkit(wrap_method=sync_base) + async def async_override(self, command: str, timeout: int = 30) -> str: + return command + + # Name should come from the sync base + assert async_override.__name__ == "sync_base" + # But unwrap should still resolve to async + assert iscoroutinefunction(unwrap(async_override)) + + +@pytest.mark.unit +@pytest.mark.asyncio +async def test_async_override_with_sync_wrap_executes_correctly(): + """The async override wrapped with a sync base should still + execute correctly as an async function. + """ + mock_toolkit = _create_mock_toolkit() + mock_task_lock = MagicMock() + mock_task_lock.put_queue = AsyncMock() + + def sync_base(self, value: int) -> int: + return value + + with patch( + "app.utils.listen.toolkit_listen.get_task_lock", + return_value=mock_task_lock, + ): + + @listen_toolkit(wrap_method=sync_base) + async def async_override(self, value: int) -> int: + await asyncio.sleep(0) # prove we're truly async + return value * 2 + + result = await async_override(mock_toolkit, 21) + assert result == 42 diff --git a/src/components/ChatBox/index.tsx b/src/components/ChatBox/index.tsx index f84992b2a..78c8cb870 100644 --- a/src/components/ChatBox/index.tsx +++ b/src/components/ChatBox/index.tsx @@ -23,7 +23,12 @@ import { import useChatStoreAdapter from '@/hooks/useChatStoreAdapter'; import { generateUniqueId, replayActiveTask } from '@/lib'; import { useAuthStore } from '@/store/authStore'; -import { AgentStep, ChatTaskStatus } from '@/types/constants'; +import { + AgentStep, + ApprovalAction, + ChatTaskStatus, + TERMINAL_APPROVAL_STORAGE_KEY, +} from '@/types/constants'; import { Square, SquareCheckBig, TriangleAlert } from 'lucide-react'; import { useCallback, useEffect, useMemo, useRef, useState } from 'react'; import { useTranslation } from 'react-i18next'; @@ -162,6 +167,7 @@ export default function ChatBox(): JSX.Element { const [loading, setLoading] = useState(false); const [isReplayLoading, setIsReplayLoading] = useState(false); + const [showFullCommand, setShowFullCommand] = useState(false); const [isPauseResumeLoading, setIsPauseResumeLoading] = useState(false); const handleSendRef = useRef< ((messageStr?: string, taskId?: string) => Promise) | null @@ -603,6 +609,11 @@ export default function ChatBox(): JSX.Element { question: tempMessageContent, task_id: nextTaskId, attaches: improveAttaches, + hitl_options: { + terminal_approval: + localStorage.getItem(TERMINAL_APPROVAL_STORAGE_KEY) === + 'true', + }, }); chatStore.setIsPending(_taskId, true); chatStore.addMessages(_taskId, { @@ -1009,6 +1020,135 @@ export default function ChatBox(): JSX.Element { onSkip={handleSkip} isPauseResumeLoading={isPauseResumeLoading} /> + {/* Dangerous command approval queue */} + {chatStore.activeTaskId && + (chatStore.tasks[chatStore.activeTaskId]?.approvalQueue?.length ?? + 0) > 0 && ( +
+
+
+ {t('chat.approval-prompt')} +
+ {chatStore.tasks[chatStore.activeTaskId].approvalQueue + .length > 1 && ( + + { + chatStore.tasks[chatStore.activeTaskId].approvalQueue + .length + }{' '} + {t('chat.approval-pending')} + + )} +
+ {(() => { + const current = + chatStore.tasks[chatStore.activeTaskId].approvalQueue[0]; + const cmd = current?.command || ''; + const isLong = cmd.length > 100; + return ( + + {!isLong || showFullCommand + ? cmd + : cmd.slice(0, 100) + '...'} + {isLong && ( + <> + {' '} + setShowFullCommand((v) => !v)} + > + {showFullCommand + ? t('chat.show-less') + : t('chat.show-more')} + + + )} + + ); + })()} +
+ + + +
+
+ )} {chatStore.activeTaskId && ( (null); const [userExpanded, setUserExpanded] = useState(false); - const isExpanded = userExpanded || value.length > 0; + const isExpanded = userExpanded || (value ?? '').length > 0; const expand = useCallback(() => { setUserExpanded(true); diff --git a/src/i18n/locales/ar/chat.json b/src/i18n/locales/ar/chat.json index 7cd3c5994..d2fc1c09d 100644 --- a/src/i18n/locales/ar/chat.json +++ b/src/i18n/locales/ar/chat.json @@ -2,6 +2,13 @@ "welcome-to-eigent": "Eigent أهلاً بك في", "how-can-i-help-you": "كيف يمكنني مساعدتك اليوم؟", "ask-placeholder": "ما الذي تحتاج إلى تحقيقه اليوم؟", + "approval-prompt": "هل تريد الموافقة على هذا الأمر؟", + "approval-yes": "نعم", + "approval-all-yes": "موافقة تلقائية لهذه المهمة", + "approval-no": "لا", + "approval-pending": "في الانتظار", + "show-more": "عرض المزيد", + "show-less": "عرض أقل", "select-file": "اختر ملف", "all-files": "كل الملفات", "token": "رمز", diff --git a/src/i18n/locales/ar/setting.json b/src/i18n/locales/ar/setting.json index 05790d126..524e28714 100644 --- a/src/i18n/locales/ar/setting.json +++ b/src/i18n/locales/ar/setting.json @@ -1,6 +1,10 @@ { "settings": "إعدادات", "general": "عام", + "human-in-the-loop": "تدخل بشري", + "human-in-the-loop-title": "تدخل بشري", + "terminal-approval": "موافقة على أوامر الطرفية", + "terminal-approval-hint": "طلب موافقتك قبل تنفيذ أوامر طرفية قد تكون خطيرة (مثل rm وsudo وkill).", "privacy": "خصوصية", "models": "نماذج", "mcp": "MCP الأدوات", diff --git a/src/i18n/locales/de/chat.json b/src/i18n/locales/de/chat.json index 3eb535d43..8a66f6556 100644 --- a/src/i18n/locales/de/chat.json +++ b/src/i18n/locales/de/chat.json @@ -2,6 +2,13 @@ "welcome-to-eigent": "Willkommen bei Eigent", "how-can-i-help-you": "Wie kann ich Ihnen heute helfen?", "ask-placeholder": "Was möchten Sie heute erreichen?", + "approval-prompt": "Diesen Terminalbefehl genehmigen?", + "approval-yes": "Ja", + "approval-all-yes": "Für diese Aufgabe automatisch genehmigen", + "approval-no": "Nein", + "approval-pending": "ausstehend", + "show-more": "Mehr anzeigen", + "show-less": "Weniger anzeigen", "select-file": "Datei auswählen", "all-files": "Alle Dateien", "token": "Token", diff --git a/src/i18n/locales/de/setting.json b/src/i18n/locales/de/setting.json index 25bacb0d3..0173cd112 100644 --- a/src/i18n/locales/de/setting.json +++ b/src/i18n/locales/de/setting.json @@ -1,6 +1,10 @@ { "settings": "Einstellungen", "general": "Allgemein", + "human-in-the-loop": "Kontrolle", + "human-in-the-loop-title": "Kontrolle", + "terminal-approval": "Terminalbefehle genehmigen", + "terminal-approval-hint": "Ihre Genehmigung einholen, bevor potenziell gefährliche Terminalbefehle ausgeführt werden (z. B. rm, sudo, kill).", "privacy": "Datenschutz", "models": "Modelle", "mcp": "MCP & Tools", diff --git a/src/i18n/locales/en-us/chat.json b/src/i18n/locales/en-us/chat.json index 444c1e495..31cccfc2a 100644 --- a/src/i18n/locales/en-us/chat.json +++ b/src/i18n/locales/en-us/chat.json @@ -2,6 +2,13 @@ "welcome-to-eigent": "Welcome to Eigent", "how-can-i-help-you": "How can I help you today?", "ask-placeholder": "Ask Eigent to automate your tasks", + "approval-prompt": "Approve this terminal command?", + "approval-yes": "Yes", + "approval-all-yes": "Auto-approve for this task", + "approval-no": "No", + "approval-pending": "pending", + "show-more": "Show more", + "show-less": "Show less", "select-file": "Select File", "all-files": "All Files", "token": "Token", diff --git a/src/i18n/locales/en-us/setting.json b/src/i18n/locales/en-us/setting.json index 4402f7036..81334c494 100644 --- a/src/i18n/locales/en-us/setting.json +++ b/src/i18n/locales/en-us/setting.json @@ -1,6 +1,8 @@ { "settings": "Settings", "general": "General", + "human-in-the-loop": "HITL", + "human-in-the-loop-title": "Human in the Loop", "privacy": "Privacy", "models": "Models", "mcp": "MCP & Tools", @@ -19,6 +21,9 @@ "light": "Light", "transparent": "Transparent", + "terminal-approval": "Terminal Command Approval", + "terminal-approval-hint": "Require your approval before executing potentially dangerous terminal commands (e.g. rm, sudo, kill).", + "data-privacy": "Data Privacy", "data-privacy-description": "Eigent is built on a local-first principle to ensure your privacy. Your data remains on your device by default. Cloud features are optional and only use the minimum data necessary to function. For full details, visit our", "privacy-policy": "Privacy Policy", diff --git a/src/i18n/locales/es/chat.json b/src/i18n/locales/es/chat.json index 84fdaff41..3a770390c 100644 --- a/src/i18n/locales/es/chat.json +++ b/src/i18n/locales/es/chat.json @@ -2,6 +2,13 @@ "welcome-to-eigent": "¡Bienvenido a Eigent!", "how-can-i-help-you": "¿Cómo puedo ayudarte hoy?", "ask-placeholder": "¿Qué necesitas lograr hoy?", + "approval-prompt": "¿Aprobar este comando de terminal?", + "approval-yes": "Sí", + "approval-all-yes": "Aprobar automáticamente en esta tarea", + "approval-no": "No", + "approval-pending": "pendiente", + "show-more": "Mostrar más", + "show-less": "Mostrar menos", "select-file": "Seleccionar archivo", "all-files": "Todos los archivos", "token": "Token", diff --git a/src/i18n/locales/es/setting.json b/src/i18n/locales/es/setting.json index dc0cbb7b2..0111bcd25 100644 --- a/src/i18n/locales/es/setting.json +++ b/src/i18n/locales/es/setting.json @@ -1,6 +1,10 @@ { "settings": "Ajustes", "general": "Generales", + "human-in-the-loop": "Supervisión", + "human-in-the-loop-title": "Supervisión", + "terminal-approval": "Aprobación de comandos", + "terminal-approval-hint": "Solicitar aprobación antes de ejecutar comandos de terminal potencialmente peligrosos (p. ej., rm, sudo, kill).", "privacy": "Privacidad", "models": "Modelos", "mcp": "MCP & Herramientas", diff --git a/src/i18n/locales/fr/chat.json b/src/i18n/locales/fr/chat.json index 36ba0a8fb..1dcbab068 100644 --- a/src/i18n/locales/fr/chat.json +++ b/src/i18n/locales/fr/chat.json @@ -2,6 +2,13 @@ "welcome-to-eigent": "Bienvenue chez Eigent", "how-can-i-help-you": "Comment puis-je vous aider aujourd'hui ?", "ask-placeholder": "Que devez-vous accomplir aujourd'hui ?", + "approval-prompt": "Approuver cette commande terminal ?", + "approval-yes": "Oui", + "approval-all-yes": "Approuver automatiquement pour cette tâche", + "approval-no": "Non", + "approval-pending": "en attente", + "show-more": "Afficher plus", + "show-less": "Afficher moins", "select-file": "Sélectionner un fichier", "all-files": "Tous les fichiers", "token": "Jeton", diff --git a/src/i18n/locales/fr/setting.json b/src/i18n/locales/fr/setting.json index d0f16529c..c7c345164 100644 --- a/src/i18n/locales/fr/setting.json +++ b/src/i18n/locales/fr/setting.json @@ -1,6 +1,10 @@ { "settings": "Réglages", "general": "General", + "human-in-the-loop": "Supervision", + "human-in-the-loop-title": "Supervision", + "terminal-approval": "Approbation des commandes", + "terminal-approval-hint": "Demander votre approbation avant d'exécuter des commandes de terminal potentiellement dangereuses (ex. rm, sudo, kill).", "privacy": "Privacy", "models": "Models", "mcp": "MCP & Tools", diff --git a/src/i18n/locales/it/chat.json b/src/i18n/locales/it/chat.json index 12ae56047..38ce013ec 100644 --- a/src/i18n/locales/it/chat.json +++ b/src/i18n/locales/it/chat.json @@ -2,6 +2,13 @@ "welcome-to-eigent": "Benvenuto in Eigent", "how-can-i-help-you": "Come posso aiutarti oggi?", "ask-placeholder": "Cosa devi realizzare oggi?", + "approval-prompt": "Approvare questo comando del terminale?", + "approval-yes": "Sì", + "approval-all-yes": "Approva automaticamente per questa attività", + "approval-no": "No", + "approval-pending": "in attesa", + "show-more": "Mostra di più", + "show-less": "Mostra meno", "select-file": "Seleziona File", "all-files": "Tutti i File", "token": "Token", diff --git a/src/i18n/locales/it/setting.json b/src/i18n/locales/it/setting.json index 8352ea1cf..9ec13fa82 100644 --- a/src/i18n/locales/it/setting.json +++ b/src/i18n/locales/it/setting.json @@ -1,6 +1,10 @@ { "settings": "Impostazioni", "general": "Generale", + "human-in-the-loop": "Controllo", + "human-in-the-loop-title": "Controllo", + "terminal-approval": "Approvazione comandi", + "terminal-approval-hint": "Richiedere l'approvazione prima di eseguire comandi terminale potenzialmente pericolosi (es. rm, sudo, kill).", "privacy": "Privacy", "models": "Modelli", "mcp": "MCP e Strumenti", diff --git a/src/i18n/locales/ja/chat.json b/src/i18n/locales/ja/chat.json index 3d025b6ce..2d6c4bc28 100644 --- a/src/i18n/locales/ja/chat.json +++ b/src/i18n/locales/ja/chat.json @@ -2,6 +2,13 @@ "welcome-to-eigent": "Eigentへようこそ", "how-can-i-help-you": "本日はどのようなご用件でしょうか?", "ask-placeholder": "本日は何を達成したいですか?", + "approval-prompt": "このターミナルコマンドを承認しますか?", + "approval-yes": "はい", + "approval-all-yes": "このタスクで自動承認", + "approval-no": "いいえ", + "approval-pending": "保留中", + "show-more": "もっと見る", + "show-less": "折りたたむ", "select-file": "ファイルを選択", "all-files": "すべてのファイル", "token": "トークン", diff --git a/src/i18n/locales/ja/setting.json b/src/i18n/locales/ja/setting.json index d726d0748..36df22a7d 100644 --- a/src/i18n/locales/ja/setting.json +++ b/src/i18n/locales/ja/setting.json @@ -1,6 +1,10 @@ { "settings": "設定", "general": "一般", + "human-in-the-loop": "人間参加型", + "human-in-the-loop-title": "人間参加型", + "terminal-approval": "ターミナルコマンド承認", + "terminal-approval-hint": "有効にすると、危険なターミナルコマンド(rm、sudo、kill など)の実行前に承認を求めます。", "privacy": "プライバシー", "models": "モデル", "mcp": "MCP & ツール", diff --git a/src/i18n/locales/ko/chat.json b/src/i18n/locales/ko/chat.json index 4b50320da..5ece99696 100644 --- a/src/i18n/locales/ko/chat.json +++ b/src/i18n/locales/ko/chat.json @@ -2,6 +2,13 @@ "welcome-to-eigent": "Eigent에 오신 것을 환영합니다", "how-can-i-help-you": "무엇을 도와드릴까요?", "ask-placeholder": "오늘 무엇을 달성하고 싶으신가요?", + "approval-prompt": "이 터미널 명령을 승인하시겠습니까?", + "approval-yes": "예", + "approval-all-yes": "이 작업에서 자동 승인", + "approval-no": "아니요", + "approval-pending": "대기 중", + "show-more": "더 보기", + "show-less": "접기", "select-file": "파일 선택", "all-files": "모든 파일", "token": "토큰", diff --git a/src/i18n/locales/ko/setting.json b/src/i18n/locales/ko/setting.json index 698c8a65e..2cc681fd2 100644 --- a/src/i18n/locales/ko/setting.json +++ b/src/i18n/locales/ko/setting.json @@ -1,6 +1,10 @@ { "settings": "설정", "general": "일반", + "human-in-the-loop": "사람 개입", + "human-in-the-loop-title": "사람 개입", + "terminal-approval": "터미널 명령 승인", + "terminal-approval-hint": "활성화하면 위험한 터미널 명령(예: rm, sudo, kill) 실행 전 승인을 요청합니다.", "privacy": "개인정보", "models": "모델", "mcp": "MCP 및 도구", diff --git a/src/i18n/locales/ru/chat.json b/src/i18n/locales/ru/chat.json index 4eaae3a2b..0beaf26db 100644 --- a/src/i18n/locales/ru/chat.json +++ b/src/i18n/locales/ru/chat.json @@ -2,6 +2,13 @@ "welcome-to-eigent": "Добро пожаловать в Eigent", "how-can-i-help-you": "Чем я могу вам помочь сегодня?", "ask-placeholder": "Чего вы хотите достичь сегодня?", + "approval-prompt": "Одобрить эту команду терминала?", + "approval-yes": "Да", + "approval-all-yes": "Автоматически одобрять для этой задачи", + "approval-no": "Нет", + "approval-pending": "ожидает", + "show-more": "Показать больше", + "show-less": "Показать меньше", "select-file": "Выбрать файл", "all-files": "Все файлы", "token": "Токен", diff --git a/src/i18n/locales/ru/setting.json b/src/i18n/locales/ru/setting.json index b8988c0c8..c99b50c61 100644 --- a/src/i18n/locales/ru/setting.json +++ b/src/i18n/locales/ru/setting.json @@ -1,6 +1,10 @@ { "settings": "настройки", "general": "Общие", + "human-in-the-loop": "Контроль", + "human-in-the-loop-title": "Контроль", + "terminal-approval": "Одобрение команд терминала", + "terminal-approval-hint": "Запрашивать одобрение перед выполнением потенциально опасных терминальных команд (например, rm, sudo, kill).", "privacy": "Конфиденциальность", "models": "Модели", "mcp": "MCP и Инструменты", diff --git a/src/i18n/locales/zh-Hans/chat.json b/src/i18n/locales/zh-Hans/chat.json index a42c676f1..29049d0d9 100644 --- a/src/i18n/locales/zh-Hans/chat.json +++ b/src/i18n/locales/zh-Hans/chat.json @@ -2,6 +2,13 @@ "welcome-to-eigent": "欢迎使用 Eigent", "how-can-i-help-you": "今天有什么可以帮你的吗?", "ask-placeholder": "今天你需要完成什么任务?", + "approval-prompt": "是否批准此终端命令?", + "approval-yes": "是", + "approval-all-yes": "本任务自动批准", + "approval-no": "否", + "approval-pending": "待处理", + "show-more": "显示更多", + "show-less": "显示较少", "select-file": "选择文件", "all-files": "所有文件", "token": "Token", diff --git a/src/i18n/locales/zh-Hans/setting.json b/src/i18n/locales/zh-Hans/setting.json index 51f109ac3..4fa283920 100644 --- a/src/i18n/locales/zh-Hans/setting.json +++ b/src/i18n/locales/zh-Hans/setting.json @@ -1,6 +1,10 @@ { "settings": "设置", "general": "通用", + "human-in-the-loop": "人机协作", + "human-in-the-loop-title": "人机协作", + "terminal-approval": "终端命令审批", + "terminal-approval-hint": "启用后,执行危险的终端命令(如 rm、sudo、kill 等)前需要您的确认。", "privacy": "隐私", "models": "模型", "mcp": "MCP & 工具", diff --git a/src/i18n/locales/zh-Hant/chat.json b/src/i18n/locales/zh-Hant/chat.json index 1ae31eff4..3a48ade21 100644 --- a/src/i18n/locales/zh-Hant/chat.json +++ b/src/i18n/locales/zh-Hant/chat.json @@ -2,6 +2,13 @@ "welcome-to-eigent": "歡迎使用 Eigent", "how-can-i-help-you": "今天有什麼可以幫你的嗎?", "ask-placeholder": "今天你需要完成什麼任務?", + "approval-prompt": "是否批准此終端命令?", + "approval-yes": "是", + "approval-all-yes": "本任務自動批准", + "approval-no": "否", + "approval-pending": "待處理", + "show-more": "顯示更多", + "show-less": "顯示較少", "select-file": "選擇文件", "all-files": "所有文件", "token": "Token", diff --git a/src/i18n/locales/zh-Hant/setting.json b/src/i18n/locales/zh-Hant/setting.json index fbc6bf43b..9e3b45439 100644 --- a/src/i18n/locales/zh-Hant/setting.json +++ b/src/i18n/locales/zh-Hant/setting.json @@ -1,6 +1,10 @@ { "settings": "設定", "general": "通用", + "human-in-the-loop": "人機協作", + "human-in-the-loop-title": "人機協作", + "terminal-approval": "終端命令審批", + "terminal-approval-hint": "啟用後,執行危險的終端命令(如 rm、sudo、kill 等)前需要您的確認。", "privacy": "隱私", "models": "模型", "mcp": "MCP & 工具", diff --git a/src/pages/Setting/HumanInTheLoop.tsx b/src/pages/Setting/HumanInTheLoop.tsx new file mode 100644 index 000000000..7e74d181e --- /dev/null +++ b/src/pages/Setting/HumanInTheLoop.tsx @@ -0,0 +1,77 @@ +// ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. ========= + +import { Switch } from '@/components/ui/switch'; +import { TERMINAL_APPROVAL_STORAGE_KEY } from '@/types/constants'; +import { useState } from 'react'; +import { useTranslation } from 'react-i18next'; + +export default function SettingHumanInTheLoop() { + const { t } = useTranslation(); + + // Terminal Command Approval - disabled by default + const [terminalApproval, setTerminalApproval] = useState(() => { + try { + return localStorage.getItem(TERMINAL_APPROVAL_STORAGE_KEY) === 'true'; + } catch { + return false; + } + }); + + const handleTerminalApprovalChange = (checked: boolean) => { + setTerminalApproval(checked); + try { + localStorage.setItem(TERMINAL_APPROVAL_STORAGE_KEY, String(checked)); + } catch { + // ignore + } + }; + + return ( +
+ {/* Header Section */} +
+
+
+
+ {t('setting.human-in-the-loop-title')} +
+
+
+
+ + {/* Content Section */} +
+ {/* Terminal Command Approval */} +
+
+
+
+ {t('setting.terminal-approval')} +
+
+ {t('setting.terminal-approval-hint')} +
+
+ +
+
+
+
+ ); +} diff --git a/src/pages/Setting/index.tsx b/src/pages/Setting/index.tsx index 663b92824..187b0ce27 100644 --- a/src/pages/Setting/index.tsx +++ b/src/pages/Setting/index.tsx @@ -19,9 +19,10 @@ import VerticalNavigation, { } from '@/components/Navigation'; import useAppVersion from '@/hooks/use-app-version'; import General from '@/pages/Setting/General'; +import HumanInTheLoop from '@/pages/Setting/HumanInTheLoop'; import Privacy from '@/pages/Setting/Privacy'; import { useAuthStore } from '@/store/authStore'; -import { Fingerprint, Settings, TagIcon } from 'lucide-react'; +import { Fingerprint, Settings, ShieldCheck, TagIcon } from 'lucide-react'; import { useState } from 'react'; import { useTranslation } from 'react-i18next'; import { useLocation, useNavigate } from 'react-router-dom'; @@ -41,6 +42,12 @@ export default function Setting() { icon: Settings, path: '/setting/general', }, + { + id: 'human-in-the-loop', + name: t('setting.human-in-the-loop'), + icon: ShieldCheck, + path: '/setting/human-in-the-loop', + }, { id: 'privacy', name: t('setting.privacy'), @@ -122,6 +129,7 @@ export default function Setting() {
{activeTab === 'general' && } + {activeTab === 'human-in-the-loop' && } {activeTab === 'privacy' && }
diff --git a/src/store/chatStore.ts b/src/store/chatStore.ts index 45924a6ef..58fd0f4de 100644 --- a/src/store/chatStore.ts +++ b/src/store/chatStore.ts @@ -31,6 +31,7 @@ import { AgentStatusValue, AgentStep, ChatTaskStatus, + TERMINAL_APPROVAL_STORAGE_KEY, TaskStatus, type ChatTaskStatusType, } from '@/types/constants'; @@ -73,6 +74,8 @@ interface Task { isTakeControl: boolean; isTaskEdit: boolean; isContextExceeded?: boolean; + // Queue of pending command approvals (supports concurrent requests) + approvalQueue: { command: string; agent: string; approvalId: string }[]; // Streaming decompose text - stored separately to avoid frequent re-renders streamingDecomposeText: string; } @@ -113,6 +116,12 @@ export interface ChatStore { setTaskRunning: (taskId: string, taskRunning: TaskInfo[]) => void; setActiveAsk: (taskId: string, agentName: string) => void; setActiveAskList: (taskId: string, message: Message[]) => void; + pushApproval: ( + taskId: string, + item: { command: string; agent: string; approvalId: string } + ) => void; + shiftApproval: (taskId: string) => void; + clearAllApprovals: (taskId: string) => void; addWebViewUrl: ( taskId: string, webViewUrl: string, @@ -282,6 +291,7 @@ const chatStore = (initial?: Partial) => snapshotsTemp: [], isTakeControl: false, isTaskEdit: false, + approvalQueue: [], streamingDecomposeText: '', }, }, @@ -721,6 +731,18 @@ const chatStore = (initial?: Partial) => installed_mcp: { mcpServers: {} }, language: systemLanguage, allow_local_system: true, + hitl_options: { + terminal_approval: (() => { + try { + return ( + localStorage.getItem(TERMINAL_APPROVAL_STORAGE_KEY) === + 'true' + ); + } catch { + return false; + } + })(), + }, attaches: ( messageAttaches || targetChatStore.getState().tasks[newTaskId]?.attaches || @@ -952,6 +974,9 @@ const chatStore = (initial?: Partial) => addFileList, setActiveAsk, setActiveAskList, + pushApproval, + shiftApproval, + clearAllApprovals, tasks, create: _create, setTaskTime, @@ -1821,6 +1846,15 @@ const chatStore = (initial?: Partial) => ); return; } + // Terminal command approval - no 30s auto-skip + if (agentMessages.step === AgentStep.COMMAND_APPROVAL) { + pushApproval(currentTaskId, { + command: agentMessages.data?.command ?? '', + agent: agentMessages.data?.agent ?? '', + approvalId: agentMessages.data?.approval_id ?? '', + }); + return; + } // Write File if (agentMessages.step === AgentStep.WRITE_FILE) { console.log('write_to_file', agentMessages.data); @@ -2235,6 +2269,9 @@ const chatStore = (initial?: Partial) => setIsPending(currentTaskId, false); setStatus(currentTaskId, ChatTaskStatus.FINISHED); + // Clear any pending approval prompts so they don't + // persist when the user re-enters the project. + clearAllApprovals(currentTaskId); // completed tasks move to history setUpdateCount(); @@ -2791,6 +2828,48 @@ const chatStore = (initial?: Partial) => }, })); }, + pushApproval( + taskId: string, + item: { command: string; agent: string; approvalId: string } + ) { + set((state) => ({ + ...state, + tasks: { + ...state.tasks, + [taskId]: { + ...state.tasks[taskId], + approvalQueue: [ + ...(state.tasks[taskId]?.approvalQueue ?? []), + item, + ], + }, + }, + })); + }, + shiftApproval(taskId: string) { + set((state) => ({ + ...state, + tasks: { + ...state.tasks, + [taskId]: { + ...state.tasks[taskId], + approvalQueue: (state.tasks[taskId]?.approvalQueue ?? []).slice(1), + }, + }, + })); + }, + clearAllApprovals(taskId: string) { + set((state) => ({ + ...state, + tasks: { + ...state.tasks, + [taskId]: { + ...state.tasks[taskId], + approvalQueue: [], + }, + }, + })); + }, setProgressValue(taskId: string, progressValue: number) { set((state) => ({ ...state, diff --git a/src/types/constants.ts b/src/types/constants.ts index eba956f65..5226d6099 100644 --- a/src/types/constants.ts +++ b/src/types/constants.ts @@ -38,6 +38,7 @@ export const AgentStep = { REMOVE_TASK: 'remove_task', NOTICE: 'notice', ASK: 'ask', + COMMAND_APPROVAL: 'command_approval', SYNC: 'sync', NOTICE_CARD: 'notice_card', FAILED: 'failed', @@ -46,6 +47,19 @@ export const AgentStep = { export type AgentStepType = (typeof AgentStep)[keyof typeof AgentStep]; +/** + * User responses for terminal command approval. + * Mirrors backend ApprovalAction enum in app/model/enums.py. + */ +export const ApprovalAction = { + APPROVE_ONCE: 'approve_once', + AUTO_APPROVE: 'auto_approve', + REJECT: 'reject', +} as const; + +export type ApprovalActionType = + (typeof ApprovalAction)[keyof typeof ApprovalAction]; + /** * Status values on AgentMessage.status (SSE message lifecycle). */ @@ -98,3 +112,8 @@ export const AgentStatusValue = { export type AgentStatusType = (typeof AgentStatusValue)[keyof typeof AgentStatusValue]; + +/** + * localStorage key for the HITL terminal approval toggle. + */ +export const TERMINAL_APPROVAL_STORAGE_KEY = 'eigent_terminal_approval';