diff --git a/.gitignore b/.gitignore index 6fc0934a021d..7480334ba7c0 100644 --- a/.gitignore +++ b/.gitignore @@ -26,7 +26,7 @@ share/python-wheels/ *.egg MANIFEST requirements.txt - +temp/ # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. diff --git a/evaluation/benchmarks/swe_bench/run_infer.py b/evaluation/benchmarks/swe_bench/run_infer.py index 2410db6e579e..37777d29f644 100644 --- a/evaluation/benchmarks/swe_bench/run_infer.py +++ b/evaluation/benchmarks/swe_bench/run_infer.py @@ -120,27 +120,37 @@ def get_instruction(instance: pd.Series, metadata: EvalMetadata) -> MessageActio mode = metadata.details['mode'] llm_model = metadata.llm_config.model - # Determine the template file based on mode and LLM - if metadata.instruction_template_name: - template_name = metadata.instruction_template_name - elif mode.startswith('swt'): - template_name = 'swt.j2' - elif mode == 'swe': - if 'gpt-4.1' in llm_model: - template_name = 'swe_gpt4.j2' - else: - template_name = ( - 'swe_default.j2' # Default for 'swe' mode (regular swe-bench) - ) + # Check for custom instruction template path (absolute path takes precedence) + custom_instruction_template_path = metadata.details.get('instruction_template_path') + + if custom_instruction_template_path and os.path.isfile(custom_instruction_template_path): + # Use custom instruction template from provided path + prompts_dir = os.path.dirname(custom_instruction_template_path) + template_name = os.path.basename(custom_instruction_template_path) + logger.info(f'Using custom instruction template: {custom_instruction_template_path}') else: - # Fallback or error handling if mode is unexpected - logger.error(f'Unexpected evaluation mode: {mode}. Falling back to default.') - template_name = 'swe_default.j2' + # Determine the template file based on mode and LLM + if metadata.instruction_template_name: + template_name = metadata.instruction_template_name + elif mode.startswith('swt'): + template_name = 'swt.j2' + elif mode == 'swe': + if 'gpt-4.1' in llm_model: + template_name = 'swe_gpt4.j2' + else: + template_name = ( + 'swe_default.j2' # Default for 'swe' mode (regular swe-bench) + ) + else: + # Fallback or error handling if mode is unexpected + logger.error(f'Unexpected evaluation mode: {mode}. Falling back to default.') + template_name = 'swe_default.j2' + + # Default prompts directory + prompts_dir = os.path.join(os.path.dirname(__file__), 'prompts') logger.debug(f'Using instruction template file: {template_name}') # Set up Jinja2 environment - # Assuming templates are in 'evaluation/benchmarks/swe_bench/prompts' relative to this script - prompts_dir = os.path.join(os.path.dirname(__file__), 'prompts') env = Environment(loader=FileSystemLoader(prompts_dir)) template = env.get_template(template_name) @@ -214,6 +224,9 @@ def get_instance_docker_image( return (docker_image_prefix.rstrip('/') + '/' + image_name).lower() + + + def get_config( instance: pd.Series, metadata: EvalMetadata, @@ -273,6 +286,8 @@ def get_config( system_prompt_filename=metadata.agent_config.system_prompt_filename if metadata.agent_config else 'system_prompt.j2', + system_prompt_path=SYSTEM_PROMPT_PATH, + system_prompt_long_horizon_path=SYSTEM_PROMPT_LONG_HORIZON_PATH, ) config.set_agent_config(agent_config) @@ -867,6 +882,13 @@ def filter_dataset( if __name__ == '__main__': + # Declare globals at the start of the block + global SYSTEM_PROMPT_PATH, SYSTEM_PROMPT_LONG_HORIZON_PATH + + # Module-level variables to store custom prompt paths + SYSTEM_PROMPT_PATH = None + SYSTEM_PROMPT_LONG_HORIZON_PATH = None + parser = get_evaluation_parser() parser.add_argument( '--dataset', @@ -899,6 +921,24 @@ def filter_dataset( default=None, help='Path to a JSON file containing instance data to use instead of loading from HuggingFace (e.g., \'{"instance_id": "...", "repo": "...", ...}\')', ) + parser.add_argument( + '--instruction-template-path', + type=str, + default=None, + help='Path to a custom instruction template file (overrides swe_default.j2)', + ) + parser.add_argument( + '--system-prompt-path', + type=str, + default=None, + help='Path to a custom system_prompt.j2 file', + ) + parser.add_argument( + '--system-prompt-long-horizon-path', + type=str, + default=None, + help='Path to a custom system_prompt_long_horizon.j2 file', + ) args, _ = parser.parse_known_args() @@ -996,7 +1036,27 @@ def filter_dataset( if args.agent_config: agent_config = get_agent_config_arg(args.agent_config, args.config_file) + # Set up custom system prompt paths if provided + if args.system_prompt_path: + if os.path.isfile(args.system_prompt_path): + SYSTEM_PROMPT_PATH = args.system_prompt_path + logger.info(f'Using custom system_prompt.j2: {SYSTEM_PROMPT_PATH}') + else: + raise ValueError(f'System prompt file does not exist: {args.system_prompt_path}') + + if args.system_prompt_long_horizon_path: + if os.path.isfile(args.system_prompt_long_horizon_path): + SYSTEM_PROMPT_LONG_HORIZON_PATH = args.system_prompt_long_horizon_path + logger.info(f'Using custom system_prompt_long_horizon.j2: {SYSTEM_PROMPT_LONG_HORIZON_PATH}') + else: + raise ValueError(f'System prompt long horizon file does not exist: {args.system_prompt_long_horizon_path}') + + # Build details dict with custom prompt paths details = {'mode': args.mode} + if args.instruction_template_path: + details['instruction_template_path'] = args.instruction_template_path + logger.info(f'Custom instruction template path: {args.instruction_template_path}') + _agent_cls = openhands.agenthub.Agent.get_cls(args.agent_cls) dataset_description = ( diff --git a/evaluation/benchmarks/swe_bench/scripts/run_infer.sh b/evaluation/benchmarks/swe_bench/scripts/run_infer.sh index 44862bf73081..203daab01ca4 100755 --- a/evaluation/benchmarks/swe_bench/scripts/run_infer.sh +++ b/evaluation/benchmarks/swe_bench/scripts/run_infer.sh @@ -64,8 +64,12 @@ EVAL_OUTPUT_DIR=${9} SELECTED_ID=${10} INSTANCE_DICT_PATH=${11} CONFIG_FILE=${12} -N_RUNS=${13} -MODE=${14} +INSTRUCTION_TEMPLATE_PATH=${13} +SYSTEM_PROMPT_PATH=${14} +SYSTEM_PROMPT_LONG_HORIZON_PATH=${15} +N_RUNS=${16} +MODE=${17} + if [ -z "$NUM_WORKERS" ]; then NUM_WORKERS=1 @@ -133,6 +137,9 @@ echo "EVAL_CONDENSER: $EVAL_CONDENSER" echo "EVAL_OUTPUT_DIR: $EVAL_OUTPUT_DIR" echo "SELECTED_ID: $SELECTED_ID" echo "INSTANCE_DICT_PATH: $INSTANCE_DICT_PATH" +echo "INSTRUCTION_TEMPLATE_PATH: $INSTRUCTION_TEMPLATE_PATH" +echo "SYSTEM_PROMPT_PATH: $SYSTEM_PROMPT_PATH" +echo "SYSTEM_PROMPT_LONG_HORIZON_PATH: $SYSTEM_PROMPT_LONG_HORIZON_PATH" echo "TMUX_MEMORY_LIMIT: $TMUX_MEMORY_LIMIT" echo "COMMAND_EXEC_TIMEOUT: $COMMAND_EXEC_TIMEOUT" @@ -199,6 +206,21 @@ function run_eval() { COMMAND="$COMMAND --config-file $CONFIG_FILE" fi + if [ -n "$INSTRUCTION_TEMPLATE_PATH" ]; then + echo "INSTRUCTION_TEMPLATE_PATH: $INSTRUCTION_TEMPLATE_PATH" + COMMAND="$COMMAND --instruction-template-path $INSTRUCTION_TEMPLATE_PATH" + fi + + if [ -n "$SYSTEM_PROMPT_PATH" ]; then + echo "SYSTEM_PROMPT_PATH: $SYSTEM_PROMPT_PATH" + COMMAND="$COMMAND --system-prompt-path $SYSTEM_PROMPT_PATH" + fi + + if [ -n "$SYSTEM_PROMPT_LONG_HORIZON_PATH" ]; then + echo "SYSTEM_PROMPT_LONG_HORIZON_PATH: $SYSTEM_PROMPT_LONG_HORIZON_PATH" + COMMAND="$COMMAND --system-prompt-long-horizon-path $SYSTEM_PROMPT_LONG_HORIZON_PATH" + fi + # Run the command eval $COMMAND } diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py index c6a6c9247954..322c14a0f5e5 100644 --- a/openhands/agenthub/codeact_agent/codeact_agent.py +++ b/openhands/agenthub/codeact_agent/codeact_agent.py @@ -17,16 +17,19 @@ from openhands.agenthub.codeact_agent.tools.condensation_request import ( CondensationRequestTool, ) +from openhands.agenthub.codeact_agent.tools.edit import EditTool from openhands.agenthub.codeact_agent.tools.finish import FinishTool +from openhands.agenthub.codeact_agent.tools.glob import GlobTool +from openhands.agenthub.codeact_agent.tools.grep import GrepTool from openhands.agenthub.codeact_agent.tools.ipython import IPythonTool +from openhands.agenthub.codeact_agent.tools.list_dir import ListDirTool from openhands.agenthub.codeact_agent.tools.llm_based_edit import LLMBasedFileEditTool -from openhands.agenthub.codeact_agent.tools.str_replace_editor import ( - create_str_replace_editor_tool, -) +from openhands.agenthub.codeact_agent.tools.read import ReadTool from openhands.agenthub.codeact_agent.tools.task_tracker import ( create_task_tracker_tool, ) from openhands.agenthub.codeact_agent.tools.think import ThinkTool +from openhands.agenthub.codeact_agent.tools.write import WriteTool from openhands.controller.agent import Agent from openhands.controller.state.state import State from openhands.core.config import AgentConfig @@ -46,7 +49,7 @@ class CodeActAgent(Agent): - VERSION = '2.2' + VERSION = "2.2" """ The Code Act Agent is a minimalist agent. The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step. @@ -77,7 +80,7 @@ def __init__(self, config: AgentConfig, llm_registry: LLMRegistry) -> None: - config (AgentConfig): The configuration for this agent """ super().__init__(config, llm_registry) - self.pending_actions: deque['Action'] = deque() + self.pending_actions: deque["Action"] = deque() self.reset() self.tools = self._get_tools() @@ -85,7 +88,7 @@ def __init__(self, config: AgentConfig, llm_registry: LLMRegistry) -> None: self.conversation_memory = ConversationMemory(self.config, self.prompt_manager) self.condenser = Condenser.from_config(self.config.condenser, llm_registry) - logger.debug(f'Using condenser: {type(self.condenser)}') + logger.debug(f"Using condenser: {type(self.condenser)}") # Override with router if needed self.llm = self.llm_registry.get_router(self.config) @@ -93,17 +96,32 @@ def __init__(self, config: AgentConfig, llm_registry: LLMRegistry) -> None: @property def prompt_manager(self) -> PromptManager: if self._prompt_manager is None: + # Use custom prompt directory if configured, otherwise use default + prompt_dir = ( + self.config.custom_prompt_dir + if self.config.custom_prompt_dir + else os.path.join(os.path.dirname(__file__), "prompts") + ) + + # Build template overrides from custom paths + template_overrides = {} + if self.config.system_prompt_path: + template_overrides['system_prompt.j2'] = self.config.system_prompt_path + if self.config.system_prompt_long_horizon_path: + template_overrides['system_prompt_long_horizon.j2'] = self.config.system_prompt_long_horizon_path + self._prompt_manager = PromptManager( - prompt_dir=os.path.join(os.path.dirname(__file__), 'prompts'), + prompt_dir=prompt_dir, system_prompt_filename=self.config.resolved_system_prompt_filename, + template_overrides=template_overrides if template_overrides else None, ) return self._prompt_manager - def _get_tools(self) -> list['ChatCompletionToolParam']: + def _get_tools(self) -> list["ChatCompletionToolParam"]: # For these models, we use short tool descriptions ( < 1024 tokens) # to avoid hitting the OpenAI token limit for tool descriptions. - SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ['gpt-4', 'o3', 'o1', 'o4'] + SHORT_TOOL_DESCRIPTION_LLM_SUBSTRS = ["gpt-4", "o3", "o1", "o4"] use_short_tool_desc = False if self.llm is not None: @@ -118,6 +136,15 @@ def _get_tools(self) -> list['ChatCompletionToolParam']: ) tools = [] + # Enable OpenCode-style tools by default + tools.append(ReadTool) + tools.append(WriteTool) + tools.append(EditTool) + # Add file search tools + tools.append(GlobTool) + tools.append(GrepTool) + tools.append(ListDirTool) + if self.config.enable_cmd: tools.append(create_cmd_run_tool(use_short_description=use_short_tool_desc)) if self.config.enable_think: @@ -127,8 +154,8 @@ def _get_tools(self) -> list['ChatCompletionToolParam']: if self.config.enable_condensation_request: tools.append(CondensationRequestTool) if self.config.enable_browsing: - if sys.platform == 'win32': - logger.warning('Windows runtime does not support browsing yet') + if sys.platform == "win32": + logger.warning("Windows runtime does not support browsing yet") else: tools.append(BrowserTool) if self.config.enable_jupyter: @@ -136,15 +163,7 @@ def _get_tools(self) -> list['ChatCompletionToolParam']: if self.config.enable_plan_mode: # In plan mode, we use the task_tracker tool for task management tools.append(create_task_tracker_tool(use_short_tool_desc)) - if self.config.enable_llm_editor: - tools.append(LLMBasedFileEditTool) - elif self.config.enable_editor: - tools.append( - create_str_replace_editor_tool( - use_short_description=use_short_tool_desc, - runtime_type=self.config.runtime, - ) - ) + return tools def reset(self) -> None: @@ -153,7 +172,7 @@ def reset(self) -> None: # Only clear pending actions, not LLM metrics self.pending_actions.clear() - def step(self, state: State) -> 'Action': + def step(self, state: State) -> "Action": """Performs one step using the CodeAct Agent. This includes gathering info on previous steps and prompting the model to make a command to execute. @@ -181,7 +200,7 @@ def step(self, state: State) -> 'Action': # if we're done, go back latest_user_message = state.get_last_user_message() - if latest_user_message and latest_user_message.content.strip() == '/exit': + if latest_user_message and latest_user_message.content.strip() == "/exit": return AgentFinishAction() # Condense the events from the state. If we get a view we'll pass those @@ -197,24 +216,24 @@ def step(self, state: State) -> 'Action': return condensation_action logger.debug( - f'Processing {len(condensed_history)} events from a total of {len(state.history)} events' + f"Processing {len(condensed_history)} events from a total of {len(state.history)} events" ) initial_user_message = self._get_initial_user_message(state.history) messages = self._get_messages(condensed_history, initial_user_message) params: dict = { - 'messages': messages, + "messages": messages, } - params['tools'] = check_tools(self.tools, self.llm.config) - params['extra_body'] = { - 'metadata': state.to_llm_metadata( + params["tools"] = check_tools(self.tools, self.llm.config) + params["extra_body"] = { + "metadata": state.to_llm_metadata( model_name=self.llm.config.model, agent_name=self.name ) } response = self.llm.completion(**params) - logger.debug(f'Response from LLM: {response}') + logger.debug(f"Response from LLM: {response}") actions = self.response_to_actions(response) - logger.debug(f'Actions after response_to_actions: {actions}') + logger.debug(f"Actions after response_to_actions: {actions}") for action in actions: self.pending_actions.append(action) return self.pending_actions.popleft() @@ -223,19 +242,19 @@ def _get_initial_user_message(self, history: list[Event]) -> MessageAction: """Finds the initial user message action from the full history.""" initial_user_message: MessageAction | None = None for event in history: - if isinstance(event, MessageAction) and event.source == 'user': + if isinstance(event, MessageAction) and event.source == "user": initial_user_message = event break if initial_user_message is None: # This should not happen in a valid conversation logger.error( - f'CRITICAL: Could not find the initial user MessageAction in the full {len(history)} events history.' + f"CRITICAL: Could not find the initial user MessageAction in the full {len(history)} events history." ) # Depending on desired robustness, could raise error or create a dummy action # and log the error raise ValueError( - 'Initial user message not found in history. Please report this issue.' + "Initial user message not found in history. Please report this issue." ) return initial_user_message @@ -273,7 +292,7 @@ def _get_messages( - For Anthropic models, specific messages are cached according to their documentation """ if not self.prompt_manager: - raise Exception('Prompt Manager not instantiated.') + raise Exception("Prompt Manager not instantiated.") # Use ConversationMemory to process events (including SystemMessageAction) messages = self.conversation_memory.process_events( @@ -288,7 +307,7 @@ def _get_messages( return messages - def response_to_actions(self, response: 'ModelResponse') -> list['Action']: + def response_to_actions(self, response: "ModelResponse") -> list["Action"]: return codeact_function_calling.response_to_actions( response, mcp_tool_names=list(self.mcp_tools.keys()), diff --git a/openhands/agenthub/codeact_agent/function_calling.py b/openhands/agenthub/codeact_agent/function_calling.py index 8dc0f98d1d35..7a9513bff973 100644 --- a/openhands/agenthub/codeact_agent/function_calling.py +++ b/openhands/agenthub/codeact_agent/function_calling.py @@ -13,12 +13,17 @@ from openhands.agenthub.codeact_agent.tools import ( BrowserTool, CondensationRequestTool, + EditTool, FinishTool, + GlobTool, + GrepTool, IPythonTool, + ListDirTool, LLMBasedFileEditTool, + ReadTool, ThinkTool, + WriteTool, create_cmd_run_tool, - create_str_replace_editor_tool, ) from openhands.agenthub.codeact_agent.tools.security_utils import RISK_LEVELS from openhands.core.exceptions import ( @@ -37,8 +42,14 @@ CmdRunAction, FileEditAction, FileReadAction, + FileWriteAction, + GlobAction, + GrepAction, IPythonRunCellAction, + ListDirAction, MessageAction, + OpenCodeReadAction, + OpenCodeWriteAction, TaskTrackingAction, ValidationFailureAction, ) @@ -46,14 +57,22 @@ from openhands.events.action.mcp import MCPAction from openhands.events.event import FileEditSource, FileReadSource from openhands.events.tool import ToolCallMetadata -from openhands.llm.tool_names import TASK_TRACKER_TOOL_NAME +from openhands.llm.tool_names import ( + EDIT_TOOL_NAME, + GLOB_TOOL_NAME, + GREP_TOOL_NAME, + LIST_DIR_TOOL_NAME, + READ_TOOL_NAME, + TASK_TRACKER_TOOL_NAME, + WRITE_TOOL_NAME, +) def combine_thought(action: Action, thought: str) -> Action: - if not hasattr(action, 'thought'): + if not hasattr(action, "thought"): return action if thought and action.thought: - action.thought = f'{thought}\n{action.thought}' + action.thought = f"{thought}\n{action.thought}" elif thought: action.thought = thought return action @@ -63,42 +82,42 @@ def set_security_risk(action: Action, arguments: dict) -> None: """Set the security risk level for the action.""" # Set security_risk attribute if provided - if 'security_risk' in arguments: - if arguments['security_risk'] in RISK_LEVELS: - if hasattr(action, 'security_risk'): + if "security_risk" in arguments: + if arguments["security_risk"] in RISK_LEVELS: + if hasattr(action, "security_risk"): action.security_risk = getattr( - ActionSecurityRisk, arguments['security_risk'] + ActionSecurityRisk, arguments["security_risk"] ) else: - logger.warning(f'Invalid security_risk value: {arguments["security_risk"]}') + logger.warning(f"Invalid security_risk value: {arguments['security_risk']}") def response_to_actions( response: ModelResponse, mcp_tool_names: list[str] | None = None ) -> list[Action]: actions: list[Action] = [] - assert len(response.choices) == 1, 'Only one choice is supported for now' + assert len(response.choices) == 1, "Only one choice is supported for now" choice = response.choices[0] assistant_msg = choice.message # Check if both content and tool_calls are None - this indicates context length has been hit has_content = assistant_msg.content is not None - has_tool_calls = hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls + has_tool_calls = hasattr(assistant_msg, "tool_calls") and assistant_msg.tool_calls if not has_content and not has_tool_calls: raise LLMContextWindowExceedError( - 'LLM returned empty response with no content and no tool calls. This indicates the context length limit has been exceeded.' + "LLM returned empty response with no content and no tool calls. This indicates the context length limit has been exceeded." ) - if hasattr(assistant_msg, 'tool_calls') and assistant_msg.tool_calls: + if hasattr(assistant_msg, "tool_calls") and assistant_msg.tool_calls: # Check if there's assistant_msg.content. If so, add it to the thought - thought = '' + thought = "" if isinstance(assistant_msg.content, str): thought = assistant_msg.content elif isinstance(assistant_msg.content, list): for msg in assistant_msg.content: - if msg['type'] == 'text': - thought += msg['text'] + if msg["type"] == "text": + thought += msg["text"] # Process each tool call to OpenHands action for i, tool_call in enumerate(assistant_msg.tool_calls): @@ -310,6 +329,97 @@ def response_to_actions( command=arguments['command'], task_list=normalized_task_list, ) + + # ================================================ + # ReadTool (OpenCode-style file reading) + # ================================================ + elif tool_call.function.name == ReadTool["function"]["name"]: + if "file_path" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "file_path" in tool call {tool_call.function.name}' + ) + action = OpenCodeReadAction( + path=arguments["file_path"], + offset=arguments.get("offset", 0), + limit=arguments.get("limit", 2000), + ) + + # ================================================ + # WriteTool (OpenCode-style file writing with LSP diagnostics) + # ================================================ + elif tool_call.function.name == WriteTool["function"]["name"]: + if "file_path" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "file_path" in tool call {tool_call.function.name}' + ) + if "content" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "content" in tool call {tool_call.function.name}' + ) + action = OpenCodeWriteAction( + path=arguments["file_path"], + content=arguments["content"], + ) + + # ================================================ + # EditTool (OpenCode-style string replacement) + # ================================================ + elif tool_call.function.name == EditTool["function"]["name"]: + if "file_path" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "file_path" in tool call {tool_call.function.name}' + ) + if "old_string" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "old_string" in tool call {tool_call.function.name}' + ) + if "new_string" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "new_string" in tool call {tool_call.function.name}' + ) + action = FileEditAction( + path=arguments["file_path"], + command="str_replace", + old_str=arguments["old_string"], + new_str=arguments["new_string"], + impl_source=FileEditSource.OH_ACI, + ) + + # ================================================ + # GlobTool (File pattern search, respects gitignore, sorted by mtime) + # ================================================ + elif tool_call.function.name == GlobTool["function"]["name"]: + if "pattern" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "pattern" in tool call {tool_call.function.name}' + ) + action = GlobAction( + pattern=arguments["pattern"], + path=arguments.get("path", "."), + ) + + # ================================================ + # GrepTool (Content search, respects gitignore) + # ================================================ + elif tool_call.function.name == GrepTool["function"]["name"]: + if "pattern" not in arguments: + raise FunctionCallValidationError( + f'Missing required argument "pattern" in tool call {tool_call.function.name}' + ) + action = GrepAction( + pattern=arguments["pattern"], + path=arguments.get("path", "."), + include=arguments.get("include", ""), + ) + + # ================================================ + # ListDirTool (Directory listing with tree structure, respects gitignore) + # ================================================ + elif tool_call.function.name == ListDirTool["function"]["name"]: + action = ListDirAction( + path=arguments.get("path", "."), + ignore=arguments.get("ignore", []), + ) # ================================================ # MCPAction (MCP) @@ -345,7 +455,7 @@ def response_to_actions( actions.append(action) else: message_action = MessageAction( - content=str(assistant_msg.content) if assistant_msg.content else '', + content=str(assistant_msg.content) if assistant_msg.content else "", wait_for_response=True, ) # Add metadata for non-tool-call messages to preserve token IDs and logprobs diff --git a/openhands/agenthub/codeact_agent/tools/__init__.py b/openhands/agenthub/codeact_agent/tools/__init__.py index 41e190db2787..39dc08e31d1b 100644 --- a/openhands/agenthub/codeact_agent/tools/__init__.py +++ b/openhands/agenthub/codeact_agent/tools/__init__.py @@ -1,19 +1,45 @@ from .bash import create_cmd_run_tool from .browser import BrowserTool from .condensation_request import CondensationRequestTool +from .edit import EditTool from .finish import FinishTool +from .glob import GlobTool +from .grep import GrepTool from .ipython import IPythonTool +from .list_dir import ListDirTool from .llm_based_edit import LLMBasedFileEditTool -from .str_replace_editor import create_str_replace_editor_tool +from .opencode_editor import OpenCodeEditor +from .opencode_impl import ( + edit_file_opencode, + glob_files_opencode, + list_dir_opencode, + read_file_opencode, + replace_with_fuzzy_matching, + write_file_opencode, +) +from .read import ReadTool from .think import ThinkTool +from .write import WriteTool __all__ = [ - 'BrowserTool', - 'CondensationRequestTool', - 'create_cmd_run_tool', - 'FinishTool', - 'IPythonTool', - 'LLMBasedFileEditTool', - 'create_str_replace_editor_tool', - 'ThinkTool', + "BrowserTool", + "CondensationRequestTool", + "create_cmd_run_tool", + "edit_file_opencode", + "EditTool", + "FinishTool", + "glob_files_opencode", + "GlobTool", + "GrepTool", + "IPythonTool", + "list_dir_opencode", + "ListDirTool", + "LLMBasedFileEditTool", + "OpenCodeEditor", + "read_file_opencode", + "ReadTool", + "replace_with_fuzzy_matching", + "ThinkTool", + "write_file_opencode", + "WriteTool", ] diff --git a/openhands/agenthub/codeact_agent/tools/edit.py b/openhands/agenthub/codeact_agent/tools/edit.py new file mode 100644 index 000000000000..e9e7d99e9229 --- /dev/null +++ b/openhands/agenthub/codeact_agent/tools/edit.py @@ -0,0 +1,68 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import EDIT_TOOL_NAME + +_EDIT_DESCRIPTION = """Performs string replacement editing on a file. + +Usage: +- Replaces occurrences of old_string with new_string in the specified file +- The old_string must match exactly (including whitespace and indentation) +- By default, only replaces the first unique occurrence +- Use replace_all=true to replace all occurrences + +CRITICAL REQUIREMENTS: +1. EXACT MATCHING: old_string must match EXACTLY, including all whitespace and indentation +2. UNIQUENESS: old_string should uniquely identify the location to edit + - Include 3-5 lines of context before and after the change point + - If old_string matches multiple locations, the edit will fail (unless using replace_all) +3. DIFFERENCE: old_string and new_string must be different + +Parameters: +- file_path: The absolute path to the file to modify +- old_string: The exact text to replace (must match file content exactly) +- new_string: The text to replace it with (must be different from old_string) +- replace_all: Optional boolean to replace all occurrences (default: false) + +Examples: +- Simple replacement: + file_path="/workspace/main.py" + old_string="def hello():\\n print('hi')" + new_string="def hello():\\n print('hello world')" + +- Replace all occurrences: + file_path="/workspace/config.py" + old_string="DEBUG = True" + new_string="DEBUG = False" + replace_all=true +""" + +EditTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=EDIT_TOOL_NAME, + description=_EDIT_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['file_path', 'old_string', 'new_string'], + 'properties': { + 'file_path': { + 'type': 'string', + 'description': 'The absolute path to the file to modify', + }, + 'old_string': { + 'type': 'string', + 'description': 'The exact text to replace (must match file content exactly)', + }, + 'new_string': { + 'type': 'string', + 'description': 'The text to replace it with (must be different from old_string)', + }, + 'replace_all': { + 'type': 'boolean', + 'description': 'Replace all occurrences (default: false)', + }, + }, + }, + ), +) + diff --git a/openhands/agenthub/codeact_agent/tools/glob.py b/openhands/agenthub/codeact_agent/tools/glob.py new file mode 100644 index 000000000000..3aaa3eccb7e2 --- /dev/null +++ b/openhands/agenthub/codeact_agent/tools/glob.py @@ -0,0 +1,44 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import GLOB_TOOL_NAME + +_GLOB_DESCRIPTION = """Fast file pattern matching tool that works with any codebase size. + +Usage: +- Supports glob patterns like "**/*.js", "*.py", "src/**/*.ts" +- Returns matching file paths sorted by modification time (most recent first) +- Use this tool when you need to find files by name patterns +- Results are limited to 100 files; use more specific patterns for large codebases + +Parameters: +- pattern: The glob pattern to match files against (e.g., "**/*.py", "*.ts") +- path: Optional directory to search in. Defaults to the workspace root. + +Examples: +- Find all Python files: pattern="**/*.py" +- Find TypeScript files in src: pattern="*.ts", path="/workspace/src" +- Find test files: pattern="**/*_test.py" or pattern="**/test_*.py" +""" + +GlobTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=GLOB_TOOL_NAME, + description=_GLOB_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['pattern'], + 'properties': { + 'pattern': { + 'type': 'string', + 'description': 'The glob pattern to match files against (e.g., "**/*.py", "*.ts", "src/**/*.js")', + }, + 'path': { + 'type': 'string', + 'description': 'Optional directory to search in. Defaults to workspace root if not specified.', + }, + }, + }, + ), +) + diff --git a/openhands/agenthub/codeact_agent/tools/grep.py b/openhands/agenthub/codeact_agent/tools/grep.py new file mode 100644 index 000000000000..a19469989d4c --- /dev/null +++ b/openhands/agenthub/codeact_agent/tools/grep.py @@ -0,0 +1,50 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import GREP_TOOL_NAME + +_GREP_DESCRIPTION = """Fast content search tool that works with any codebase size. + +Usage: +- Searches file contents using regular expressions +- Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+", "import.*from") +- Filter files by pattern with the include parameter (e.g., "*.js", "*.{ts,tsx}") +- Returns file paths and line numbers with matching content, sorted by modification time +- Results are limited to 100 matches; use more specific patterns for large codebases + +Parameters: +- pattern: The regex pattern to search for in file contents +- path: Optional directory to search in. Defaults to workspace root. +- include: Optional file pattern to filter which files to search (e.g., "*.py", "*.{js,ts}") + +Examples: +- Find TODO comments: pattern="TODO|FIXME" +- Find function definitions: pattern="def\\s+\\w+\\(" +- Find imports in Python files: pattern="^import|^from.*import", include="*.py" +""" + +GrepTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=GREP_TOOL_NAME, + description=_GREP_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['pattern'], + 'properties': { + 'pattern': { + 'type': 'string', + 'description': 'The regex pattern to search for in file contents', + }, + 'path': { + 'type': 'string', + 'description': 'Optional directory to search in. Defaults to workspace root.', + }, + 'include': { + 'type': 'string', + 'description': 'Optional file pattern to filter files (e.g., "*.py", "*.{js,ts}")', + }, + }, + }, + ), +) + diff --git a/openhands/agenthub/codeact_agent/tools/list_dir.py b/openhands/agenthub/codeact_agent/tools/list_dir.py new file mode 100644 index 000000000000..4c96d9208852 --- /dev/null +++ b/openhands/agenthub/codeact_agent/tools/list_dir.py @@ -0,0 +1,44 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import LIST_DIR_TOOL_NAME + +_LIST_DIR_DESCRIPTION = """Lists files and directories in a given path. + +Usage: +- Lists files and directories with a tree-like structure +- Automatically ignores common non-essential directories (node_modules, .git, __pycache__, etc.) +- Results are limited to 100 entries; use more specific paths for large directories +- Prefer the Glob and Grep tools if you know which files to search for + +Parameters: +- path: The absolute path to the directory to list. Defaults to workspace root if not specified. +- ignore: Optional list of additional glob patterns to ignore (e.g., ["*.log", "temp/"]) + +Examples: +- List workspace root: (no parameters needed) +- List specific directory: path="/workspace/src" +- List with custom ignores: path="/workspace", ignore=["*.log", "build/"] +""" + +ListDirTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=LIST_DIR_TOOL_NAME, + description=_LIST_DIR_DESCRIPTION, + parameters={ + 'type': 'object', + 'properties': { + 'path': { + 'type': 'string', + 'description': 'The absolute path to the directory to list. Defaults to workspace root.', + }, + 'ignore': { + 'type': 'array', + 'items': {'type': 'string'}, + 'description': 'Optional list of glob patterns to ignore (e.g., ["*.log", "temp/"])', + }, + }, + }, + ), +) + diff --git a/openhands/agenthub/codeact_agent/tools/opencode_editor.py b/openhands/agenthub/codeact_agent/tools/opencode_editor.py new file mode 100644 index 000000000000..ec5bd0ef62a2 --- /dev/null +++ b/openhands/agenthub/codeact_agent/tools/opencode_editor.py @@ -0,0 +1,228 @@ +""" +OpenCode-style editor that extends OHEditor with fuzzy matching replacers. +Based on the OpenCode edit.ts implementation. + +INSTALLATION: +To use this editor instead of the default OHEditor, modify the runtime's +file_editor initialization: + +1. In openhands/runtime/action_execution_server.py, change: + from openhands_aci.editor import OHEditor + to: + from openhands.agenthub.codeact_agent.tools.opencode_editor import OpenCodeEditor as OHEditor + +2. In openhands/runtime/impl/cli/cli_runtime.py, change: + from openhands_aci.editor import OHEditor + to: + from openhands.agenthub.codeact_agent.tools.opencode_editor import OpenCodeEditor as OHEditor + +This enables fuzzy matching for string replacements, making the editor more +robust when dealing with whitespace differences, indentation variations, etc. + +References: +- https://github.com/cline/cline/blob/main/evals/diff-edits/diff-apply/diff-06-23-25.ts +- https://github.com/google-gemini/gemini-cli/blob/main/packages/core/src/utils/editCorrector.ts +""" + +import re +from pathlib import Path +from typing import Generator + +from openhands_aci.editor import OHEditor +from openhands_aci.editor.results import CLIResult +from openhands_aci.editor.exceptions import ToolError +from openhands_aci.editor.config import SNIPPET_CONTEXT_WINDOW + +# Import the fuzzy matching functions from opencode_impl +from openhands.agenthub.codeact_agent.tools.opencode_impl import ( + simple_replacer, + line_trimmed_replacer, + block_anchor_replacer, + whitespace_normalized_replacer, + indentation_flexible_replacer, + escape_normalized_replacer, + trimmed_boundary_replacer, + context_aware_replacer, + multi_occurrence_replacer, +) + + +def fuzzy_find_match(content: str, old_str: str) -> str | None: + """ + Try to find a match using fuzzy matching replacers. + Returns the matched string if found (unique match), None otherwise. + """ + replacers = [ + simple_replacer, + line_trimmed_replacer, + block_anchor_replacer, + whitespace_normalized_replacer, + indentation_flexible_replacer, + escape_normalized_replacer, + trimmed_boundary_replacer, + context_aware_replacer, + multi_occurrence_replacer, + ] + + for replacer in replacers: + matches = list(replacer(content, old_str)) + for search in matches: + index = content.find(search) + if index == -1: + continue + + last_index = content.rfind(search) + # Only return if unique match + if index == last_index: + return search + + return None + + +class OpenCodeEditor(OHEditor): + """ + Extended OHEditor with OpenCode-style fuzzy matching for str_replace. + Falls back to fuzzy matching when exact match fails. + + This class is a drop-in replacement for OHEditor that adds fuzzy + string matching capabilities based on the OpenCode implementation. + + Fuzzy matching strategies (in order of priority): + 1. Simple exact match + 2. Line-trimmed matching (ignores leading/trailing whitespace per line) + 3. Block anchor matching (matches by first/last line with similarity check) + 4. Whitespace-normalized matching + 5. Indentation-flexible matching + 6. Escape-normalized matching + 7. Trimmed boundary matching + 8. Context-aware matching + 9. Multi-occurrence handling + + Usage: + # In your runtime initialization, replace: + self.file_editor = OHEditor(...) + # With: + from openhands.agenthub.codeact_agent.tools.opencode_editor import OpenCodeEditor + self.file_editor = OpenCodeEditor(...) + """ + + def str_replace( + self, + path: Path, + old_str: str, + new_str: str | None, + enable_linting: bool, + encoding: str = 'utf-8', + ) -> CLIResult: + """ + Override str_replace to add fuzzy matching fallback. + + First tries exact match (like OHEditor), then tries fuzzy matching + if exact match fails. + + Args: + path: Path to the file to edit + old_str: String to find and replace + new_str: Replacement string (None means empty string) + enable_linting: Whether to run linting after edit + encoding: File encoding (auto-detected if not specified) + + Returns: + CLIResult with edit operation results + """ + self.validate_file(path) + new_str = new_str or '' + + file_content = self.read_file(path) + + # First, try exact match (OHEditor behavior) + pattern = re.escape(old_str) + occurrences = [ + ( + file_content.count('\n', 0, match.start()) + 1, + match.group(), + match.start(), + ) + for match in re.finditer(pattern, file_content) + ] + + # If no exact match, try stripping whitespace (OHEditor fallback) + actual_old_str = old_str + actual_new_str = new_str + if not occurrences: + old_str_stripped = old_str.strip() + new_str_stripped = new_str.strip() + pattern = re.escape(old_str_stripped) + occurrences = [ + ( + file_content.count('\n', 0, match.start()) + 1, + match.group(), + match.start(), + ) + for match in re.finditer(pattern, file_content) + ] + if occurrences: + actual_old_str = old_str_stripped + actual_new_str = new_str_stripped + + # If still no match, try fuzzy matching (OpenCode behavior) + if not occurrences: + fuzzy_match = fuzzy_find_match(file_content, old_str) + if fuzzy_match: + idx = file_content.find(fuzzy_match) + if idx != -1: + line_num = file_content.count('\n', 0, idx) + 1 + occurrences = [(line_num, fuzzy_match, idx)] + + if not occurrences: + raise ToolError( + f'No replacement was performed, old_str `{old_str}` did not appear verbatim in {path}.' + ) + + if len(occurrences) > 1: + line_numbers = sorted(set(line for line, _, _ in occurrences)) + raise ToolError( + f'No replacement was performed. Multiple occurrences of old_str `{old_str}` in lines {line_numbers}. Please ensure it is unique.' + ) + + # We found exactly one occurrence + replacement_line, matched_text, idx = occurrences[0] + + # Create new content + new_file_content = ( + file_content[:idx] + actual_new_str + file_content[idx + len(matched_text):] + ) + + # Write the new content + self.write_file(path, new_file_content) + + # Save to history + self._history_manager.add_history(path, file_content) + + # Create snippet + start_line = max(0, replacement_line - SNIPPET_CONTEXT_WINDOW) + end_line = replacement_line + SNIPPET_CONTEXT_WINDOW + actual_new_str.count('\n') + + snippet = self.read_file(path, start_line=start_line + 1, end_line=end_line) + + success_message = f'The file {path} has been edited. ' + success_message += self._make_output( + snippet, f'a snippet of {path}', start_line + 1 + ) + + if enable_linting: + lint_results = self._run_linting( + file_content, new_file_content, path + ) + success_message += '\n' + lint_results + '\n' + + success_message += 'Review the changes and make sure they are as expected. Edit the file again if necessary.' + + return CLIResult( + output=success_message, + prev_exist=True, + path=str(path), + old_content=file_content, + new_content=new_file_content, + ) + diff --git a/openhands/agenthub/codeact_agent/tools/opencode_impl.py b/openhands/agenthub/codeact_agent/tools/opencode_impl.py new file mode 100644 index 000000000000..adac8c5d24fe --- /dev/null +++ b/openhands/agenthub/codeact_agent/tools/opencode_impl.py @@ -0,0 +1,760 @@ +""" +Exact Python implementations of OpenCode tools. +These implementations match the TypeScript OpenCode implementations. +""" + +import os +import re +from pathlib import Path +from typing import Generator + +# ============================================================================ +# Constants +# ============================================================================ + +DEFAULT_READ_LIMIT = 2000 +MAX_LINE_LENGTH = 2000 +MAX_BYTES = 50 * 1024 # 50KB + +BINARY_EXTENSIONS = { + '.zip', '.tar', '.gz', '.exe', '.dll', '.so', '.class', '.jar', '.war', + '.7z', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.odt', '.ods', + '.odp', '.bin', '.dat', '.obj', '.o', '.a', '.lib', '.wasm', '.pyc', '.pyo' +} + +IGNORE_PATTERNS = [ + 'node_modules/', '__pycache__/', '.git/', 'dist/', 'build/', 'target/', + 'vendor/', 'bin/', 'obj/', '.idea/', '.vscode/', '.zig-cache/', 'zig-out', + '.coverage', 'coverage/', 'tmp/', 'temp/', '.cache/', 'cache/', 'logs/', + '.venv/', 'venv/', 'env/' +] + +# Similarity thresholds for block anchor fallback matching +SINGLE_CANDIDATE_SIMILARITY_THRESHOLD = 0.0 +MULTIPLE_CANDIDATES_SIMILARITY_THRESHOLD = 0.3 + +# ============================================================================ +# Read Tool Implementation +# ============================================================================ + + +def is_binary_file(filepath: str) -> bool: + """Check if a file is binary.""" + ext = os.path.splitext(filepath)[1].lower() + if ext in BINARY_EXTENSIONS: + return True + + try: + with open(filepath, 'rb') as f: + chunk = f.read(4096) + if not chunk: + return False + + # Check for null bytes + if b'\x00' in chunk: + return True + + # Count non-printable characters + non_printable = sum( + 1 for byte in chunk + if byte < 9 or (byte > 13 and byte < 32) + ) + + # If >30% non-printable characters, consider it binary + return non_printable / len(chunk) > 0.3 + except Exception: + return False + + +def read_file_opencode( + filepath: str, + offset: int = 0, + limit: int = DEFAULT_READ_LIMIT +) -> str: + """ + Read a file with OpenCode-style formatting. + Returns formatted output with line numbers (5-digit padded). + """ + # Make path absolute + if not os.path.isabs(filepath): + filepath = os.path.join(os.getcwd(), filepath) + + # Check if file exists + if not os.path.exists(filepath): + # Try to find suggestions + directory = os.path.dirname(filepath) + basename = os.path.basename(filepath) + + if os.path.isdir(directory): + entries = os.listdir(directory) + suggestions = [ + os.path.join(directory, entry) + for entry in entries + if basename.lower() in entry.lower() or entry.lower() in basename.lower() + ][:3] + + if suggestions: + return f"File not found: {filepath}\n\nDid you mean one of these?\n" + "\n".join(suggestions) + + return f"File not found: {filepath}" + + # Check if binary + if is_binary_file(filepath): + return f"Cannot read binary file: {filepath}" + + # Read file + with open(filepath, 'r', encoding='utf-8', errors='replace') as f: + lines = f.read().split('\n') + + # Process lines with offset and limit + raw = [] + total_bytes = 0 + truncated_by_bytes = False + + for i in range(offset, min(len(lines), offset + limit)): + line = lines[i] + if len(line) > MAX_LINE_LENGTH: + line = line[:MAX_LINE_LENGTH] + "..." + + line_bytes = len(line.encode('utf-8')) + (1 if raw else 0) + if total_bytes + line_bytes > MAX_BYTES: + truncated_by_bytes = True + break + + raw.append(line) + total_bytes += line_bytes + + # Format with line numbers (5-digit padded with |) + content = [ + f"{str(i + offset + 1).zfill(5)}| {line}" + for i, line in enumerate(raw) + ] + + total_lines = len(lines) + last_read_line = offset + len(raw) + has_more_lines = total_lines > last_read_line + truncated = has_more_lines or truncated_by_bytes + + output = "\n" + output += "\n".join(content) + + if truncated_by_bytes: + output += f"\n\n(Output truncated at {MAX_BYTES} bytes. Use 'offset' parameter to read beyond line {last_read_line})" + elif has_more_lines: + output += f"\n\n(File has more lines. Use 'offset' parameter to read beyond line {last_read_line})" + else: + output += f"\n\n(End of file - total {total_lines} lines)" + + output += "\n" + return output + + +# ============================================================================ +# Edit Tool Implementation - Fuzzy Matching Replacers +# ============================================================================ + + +def levenshtein(a: str, b: str) -> int: + """Levenshtein distance algorithm implementation.""" + if a == "" or b == "": + return max(len(a), len(b)) + + matrix = [[0] * (len(b) + 1) for _ in range(len(a) + 1)] + + for i in range(len(a) + 1): + matrix[i][0] = i + for j in range(len(b) + 1): + matrix[0][j] = j + + for i in range(1, len(a) + 1): + for j in range(1, len(b) + 1): + cost = 0 if a[i - 1] == b[j - 1] else 1 + matrix[i][j] = min( + matrix[i - 1][j] + 1, + matrix[i][j - 1] + 1, + matrix[i - 1][j - 1] + cost + ) + + return matrix[len(a)][len(b)] + + +def simple_replacer(content: str, find: str) -> Generator[str, None, None]: + """Simple exact match replacer.""" + yield find + + +def line_trimmed_replacer(content: str, find: str) -> Generator[str, None, None]: + """Match by trimmed lines.""" + original_lines = content.split('\n') + search_lines = find.split('\n') + + if search_lines and search_lines[-1] == '': + search_lines.pop() + + for i in range(len(original_lines) - len(search_lines) + 1): + matches = True + + for j in range(len(search_lines)): + original_trimmed = original_lines[i + j].strip() + search_trimmed = search_lines[j].strip() + + if original_trimmed != search_trimmed: + matches = False + break + + if matches: + match_start_index = sum(len(original_lines[k]) + 1 for k in range(i)) + match_end_index = match_start_index + for k in range(len(search_lines)): + match_end_index += len(original_lines[i + k]) + if k < len(search_lines) - 1: + match_end_index += 1 + + yield content[match_start_index:match_end_index] + + +def block_anchor_replacer(content: str, find: str) -> Generator[str, None, None]: + """Match by first and last line anchors with similarity check.""" + original_lines = content.split('\n') + search_lines = find.split('\n') + + if len(search_lines) < 3: + return + + if search_lines and search_lines[-1] == '': + search_lines.pop() + + first_line_search = search_lines[0].strip() + last_line_search = search_lines[-1].strip() + search_block_size = len(search_lines) + + # Collect all candidate positions + candidates = [] + for i in range(len(original_lines)): + if original_lines[i].strip() != first_line_search: + continue + + for j in range(i + 2, len(original_lines)): + if original_lines[j].strip() == last_line_search: + candidates.append({'start_line': i, 'end_line': j}) + break + + if not candidates: + return + + # Handle single candidate + if len(candidates) == 1: + start_line = candidates[0]['start_line'] + end_line = candidates[0]['end_line'] + actual_block_size = end_line - start_line + 1 + + similarity = 0 + lines_to_check = min(search_block_size - 2, actual_block_size - 2) + + if lines_to_check > 0: + for j in range(1, min(search_block_size - 1, actual_block_size - 1)): + original_line = original_lines[start_line + j].strip() + search_line = search_lines[j].strip() + max_len = max(len(original_line), len(search_line)) + if max_len == 0: + continue + distance = levenshtein(original_line, search_line) + similarity += (1 - distance / max_len) / lines_to_check + + if similarity >= SINGLE_CANDIDATE_SIMILARITY_THRESHOLD: + break + else: + similarity = 1.0 + + if similarity >= SINGLE_CANDIDATE_SIMILARITY_THRESHOLD: + match_start_index = sum(len(original_lines[k]) + 1 for k in range(start_line)) + match_end_index = match_start_index + for k in range(start_line, end_line + 1): + match_end_index += len(original_lines[k]) + if k < end_line: + match_end_index += 1 + yield content[match_start_index:match_end_index] + return + + # Multiple candidates - find best match + best_match = None + max_similarity = -1 + + for candidate in candidates: + start_line = candidate['start_line'] + end_line = candidate['end_line'] + actual_block_size = end_line - start_line + 1 + + similarity = 0 + lines_to_check = min(search_block_size - 2, actual_block_size - 2) + + if lines_to_check > 0: + for j in range(1, min(search_block_size - 1, actual_block_size - 1)): + original_line = original_lines[start_line + j].strip() + search_line = search_lines[j].strip() + max_len = max(len(original_line), len(search_line)) + if max_len == 0: + continue + distance = levenshtein(original_line, search_line) + similarity += 1 - distance / max_len + similarity /= lines_to_check + else: + similarity = 1.0 + + if similarity > max_similarity: + max_similarity = similarity + best_match = candidate + + if max_similarity >= MULTIPLE_CANDIDATES_SIMILARITY_THRESHOLD and best_match: + start_line = best_match['start_line'] + end_line = best_match['end_line'] + match_start_index = sum(len(original_lines[k]) + 1 for k in range(start_line)) + match_end_index = match_start_index + for k in range(start_line, end_line + 1): + match_end_index += len(original_lines[k]) + if k < end_line: + match_end_index += 1 + yield content[match_start_index:match_end_index] + + +def whitespace_normalized_replacer(content: str, find: str) -> Generator[str, None, None]: + """Match with normalized whitespace.""" + def normalize_whitespace(text: str) -> str: + return re.sub(r'\s+', ' ', text).strip() + + normalized_find = normalize_whitespace(find) + lines = content.split('\n') + + for i, line in enumerate(lines): + if normalize_whitespace(line) == normalized_find: + yield line + else: + normalized_line = normalize_whitespace(line) + if normalized_find in normalized_line: + words = find.strip().split() + if words: + pattern = r'\s+'.join(re.escape(word) for word in words) + try: + match = re.search(pattern, line) + if match: + yield match.group(0) + except re.error: + pass + + # Handle multi-line matches + find_lines = find.split('\n') + if len(find_lines) > 1: + for i in range(len(lines) - len(find_lines) + 1): + block = lines[i:i + len(find_lines)] + if normalize_whitespace('\n'.join(block)) == normalized_find: + yield '\n'.join(block) + + +def indentation_flexible_replacer(content: str, find: str) -> Generator[str, None, None]: + """Match with flexible indentation.""" + def remove_indentation(text: str) -> str: + lines = text.split('\n') + non_empty_lines = [l for l in lines if l.strip()] + if not non_empty_lines: + return text + + min_indent = min( + len(l) - len(l.lstrip()) + for l in non_empty_lines + ) + + return '\n'.join( + line if not line.strip() else line[min_indent:] + for line in lines + ) + + normalized_find = remove_indentation(find) + content_lines = content.split('\n') + find_lines = find.split('\n') + + for i in range(len(content_lines) - len(find_lines) + 1): + block = '\n'.join(content_lines[i:i + len(find_lines)]) + if remove_indentation(block) == normalized_find: + yield block + + +def escape_normalized_replacer(content: str, find: str) -> Generator[str, None, None]: + """Match with normalized escape sequences.""" + def unescape_string(s: str) -> str: + replacements = { + '\\n': '\n', '\\t': '\t', '\\r': '\r', + "\\'": "'", '\\"': '"', '\\`': '`', + '\\\\': '\\', '\\$': '$' + } + for escaped, unescaped in replacements.items(): + s = s.replace(escaped, unescaped) + return s + + unescaped_find = unescape_string(find) + + if unescaped_find in content: + yield unescaped_find + + lines = content.split('\n') + find_lines = unescaped_find.split('\n') + + for i in range(len(lines) - len(find_lines) + 1): + block = '\n'.join(lines[i:i + len(find_lines)]) + unescaped_block = unescape_string(block) + + if unescaped_block == unescaped_find: + yield block + + +def multi_occurrence_replacer(content: str, find: str) -> Generator[str, None, None]: + """Yield all exact matches.""" + start_index = 0 + while True: + index = content.find(find, start_index) + if index == -1: + break + yield find + start_index = index + len(find) + + +def trimmed_boundary_replacer(content: str, find: str) -> Generator[str, None, None]: + """Match with trimmed boundaries.""" + trimmed_find = find.strip() + + if trimmed_find == find: + return + + if trimmed_find in content: + yield trimmed_find + + lines = content.split('\n') + find_lines = find.split('\n') + + for i in range(len(lines) - len(find_lines) + 1): + block = '\n'.join(lines[i:i + len(find_lines)]) + if block.strip() == trimmed_find: + yield block + + +def context_aware_replacer(content: str, find: str) -> Generator[str, None, None]: + """Match using context anchors.""" + find_lines = find.split('\n') + if len(find_lines) < 3: + return + + if find_lines and find_lines[-1] == '': + find_lines.pop() + + content_lines = content.split('\n') + first_line = find_lines[0].strip() + last_line = find_lines[-1].strip() + + for i in range(len(content_lines)): + if content_lines[i].strip() != first_line: + continue + + for j in range(i + 2, len(content_lines)): + if content_lines[j].strip() == last_line: + block_lines = content_lines[i:j + 1] + block = '\n'.join(block_lines) + + if len(block_lines) == len(find_lines): + matching_lines = 0 + total_non_empty = 0 + + for k in range(1, len(block_lines) - 1): + block_line = block_lines[k].strip() + find_line = find_lines[k].strip() + + if block_line or find_line: + total_non_empty += 1 + if block_line == find_line: + matching_lines += 1 + + if total_non_empty == 0 or matching_lines / total_non_empty >= 0.5: + yield block + break + break + + +def replace_with_fuzzy_matching( + content: str, + old_string: str, + new_string: str, + replace_all: bool = False +) -> str: + """ + Replace old_string with new_string using fuzzy matching. + Tries multiple replacers in order until a match is found. + """ + if old_string == new_string: + raise ValueError("oldString and newString must be different") + + not_found = True + + replacers = [ + simple_replacer, + line_trimmed_replacer, + block_anchor_replacer, + whitespace_normalized_replacer, + indentation_flexible_replacer, + escape_normalized_replacer, + trimmed_boundary_replacer, + context_aware_replacer, + multi_occurrence_replacer, + ] + + for replacer in replacers: + for search in replacer(content, old_string): + index = content.find(search) + if index == -1: + continue + + not_found = False + + if replace_all: + return content.replace(search, new_string) + + last_index = content.rfind(search) + if index != last_index: + continue + + return content[:index] + new_string + content[index + len(search):] + + if not_found: + raise ValueError("oldString not found in content") + + raise ValueError( + "Found multiple matches for oldString. Provide more surrounding lines " + "in oldString to identify the correct match." + ) + + +def edit_file_opencode( + filepath: str, + old_string: str, + new_string: str, + replace_all: bool = False +) -> str: + """ + Edit a file with OpenCode-style fuzzy matching. + Returns status message. + """ + # Make path absolute + if not os.path.isabs(filepath): + filepath = os.path.join(os.getcwd(), filepath) + + # Handle empty old_string (create/overwrite file) + if old_string == "": + with open(filepath, 'w', encoding='utf-8') as f: + f.write(new_string) + return "Edit applied successfully (file created/overwritten)." + + # Check file exists + if not os.path.exists(filepath): + raise FileNotFoundError(f"File {filepath} not found") + + if os.path.isdir(filepath): + raise IsADirectoryError(f"Path is a directory, not a file: {filepath}") + + # Read file + with open(filepath, 'r', encoding='utf-8') as f: + content = f.read() + + # Apply replacement with fuzzy matching + new_content = replace_with_fuzzy_matching(content, old_string, new_string, replace_all) + + # Write file + with open(filepath, 'w', encoding='utf-8') as f: + f.write(new_content) + + return "Edit applied successfully." + + +# ============================================================================ +# Glob Tool Implementation +# ============================================================================ + + +def glob_files_opencode(pattern: str, search_path: str = '.') -> str: + """ + Find files matching a glob pattern, sorted by modification time. + Returns formatted output. + """ + import fnmatch + + # Make path absolute + if not os.path.isabs(search_path): + search_path = os.path.join(os.getcwd(), search_path) + + if not os.path.isdir(search_path): + return f"Directory not found: {search_path}" + + limit = 100 + files = [] + truncated = False + + # Walk directory and match files + for root, _, filenames in os.walk(search_path): + for filename in filenames: + if fnmatch.fnmatch(filename, pattern): + full_path = os.path.join(root, filename) + try: + mtime = os.path.getmtime(full_path) + except OSError: + mtime = 0 + files.append({'path': full_path, 'mtime': mtime}) + + if len(files) >= limit: + truncated = True + break + + if truncated: + break + + # Sort by modification time (newest first) + files.sort(key=lambda x: x['mtime'], reverse=True) + + # Build output + if not files: + return "No files found" + + output = [f['path'] for f in files] + + if truncated: + output.append("") + output.append("(Results are truncated. Consider using a more specific path or pattern.)") + + return '\n'.join(output) + + +# ============================================================================ +# List Tool Implementation +# ============================================================================ + + +def list_dir_opencode(search_path: str = '.', ignore_patterns: list = None) -> str: + """ + List directory contents in tree structure. + Returns formatted output. + """ + if ignore_patterns is None: + ignore_patterns = IGNORE_PATTERNS + + # Make path absolute + if not os.path.isabs(search_path): + search_path = os.path.join(os.getcwd(), search_path) + + if not os.path.isdir(search_path): + return f"Directory not found: {search_path}" + + limit = 100 + files = [] + + # Collect files + for root, dirs, filenames in os.walk(search_path): + # Filter out ignored directories + rel_root = os.path.relpath(root, search_path) + skip = False + for pattern in ignore_patterns: + pattern_clean = pattern.rstrip('/') + if pattern_clean in rel_root or rel_root.startswith(pattern_clean): + skip = True + break + + if skip: + dirs[:] = [] # Don't descend into ignored directories + continue + + # Filter directories in-place + dirs[:] = [ + d for d in dirs + if not any( + d == p.rstrip('/') or d.startswith(p.rstrip('/')) + for p in ignore_patterns + ) + ] + + for filename in filenames: + rel_path = os.path.relpath(os.path.join(root, filename), search_path) + files.append(rel_path) + if len(files) >= limit: + break + + if len(files) >= limit: + break + + # Build directory structure + dirs_set = set() + files_by_dir = {} + + for file in files: + dir_path = os.path.dirname(file) + parts = dir_path.split(os.sep) if dir_path != '.' and dir_path else [] + + # Add all parent directories + for i in range(len(parts) + 1): + dir_p = os.sep.join(parts[:i]) if i > 0 else '.' + dirs_set.add(dir_p) + + # Add file to its directory + dir_key = dir_path if dir_path else '.' + if dir_key not in files_by_dir: + files_by_dir[dir_key] = [] + files_by_dir[dir_key].append(os.path.basename(file)) + + def render_dir(dir_path: str, depth: int) -> str: + indent = ' ' * depth + output = '' + + if depth > 0: + output += f"{indent}{os.path.basename(dir_path)}/\n" + + child_indent = ' ' * (depth + 1) + + # Get child directories + children = sorted([ + d for d in dirs_set + if os.path.dirname(d) == dir_path and d != dir_path + ]) + + # Render subdirectories first + for child in children: + output += render_dir(child, depth + 1) + + # Render files + dir_files = sorted(files_by_dir.get(dir_path, [])) + for f in dir_files: + output += f"{child_indent}{f}\n" + + return output + + output = f"{search_path}/\n" + render_dir('.', 0) + return output + + +# ============================================================================ +# Write Tool Implementation +# ============================================================================ + + +def write_file_opencode(filepath: str, content: str) -> str: + """ + Write content to a file. + Returns status message. + """ + # Make path absolute + if not os.path.isabs(filepath): + filepath = os.path.join(os.getcwd(), filepath) + + # Create directory if needed + directory = os.path.dirname(filepath) + if directory and not os.path.exists(directory): + os.makedirs(directory, exist_ok=True) + + # Write file + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + + return "Wrote file successfully." + diff --git a/openhands/agenthub/codeact_agent/tools/read.py b/openhands/agenthub/codeact_agent/tools/read.py new file mode 100644 index 000000000000..365983836ae4 --- /dev/null +++ b/openhands/agenthub/codeact_agent/tools/read.py @@ -0,0 +1,53 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import READ_TOOL_NAME + +_READ_DESCRIPTION = """Reads a file from the local filesystem. + +Usage: +- The file_path parameter must be an absolute path, not a relative path +- By default, reads up to 2000 lines starting from the beginning of the file +- You can optionally specify a line offset and limit (especially handy for long files) +- Any lines longer than 2000 characters will be truncated +- Results are returned with line numbers starting at 1 +- You can read multiple files in a single response by calling this tool multiple times +- If you read a file that exists but has empty contents, a warning will be shown +- This tool can also read image files (jpeg, png, gif, webp) + +Parameters: +- file_path: The absolute path to the file to read +- offset: Optional line number to start reading from (0-based, default: 0) +- limit: Optional number of lines to read (default: 2000) + +Examples: +- Read entire file: file_path="/workspace/src/main.py" +- Read from line 100: file_path="/workspace/src/main.py", offset=100 +- Read 50 lines from line 200: file_path="/workspace/src/main.py", offset=200, limit=50 +""" + +ReadTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=READ_TOOL_NAME, + description=_READ_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['file_path'], + 'properties': { + 'file_path': { + 'type': 'string', + 'description': 'The absolute path to the file to read', + }, + 'offset': { + 'type': 'integer', + 'description': 'Line number to start reading from (0-based, default: 0)', + }, + 'limit': { + 'type': 'integer', + 'description': 'Number of lines to read (default: 2000)', + }, + }, + }, + ), +) + diff --git a/openhands/agenthub/codeact_agent/tools/write.py b/openhands/agenthub/codeact_agent/tools/write.py new file mode 100644 index 000000000000..e3adb84235ce --- /dev/null +++ b/openhands/agenthub/codeact_agent/tools/write.py @@ -0,0 +1,45 @@ +from litellm import ChatCompletionToolParam, ChatCompletionToolParamFunctionChunk + +from openhands.llm.tool_names import WRITE_TOOL_NAME + +_WRITE_DESCRIPTION = """Writes content to a file, creating it if it doesn't exist or overwriting if it does. + +Usage: +- The file_path parameter must be an absolute path +- Creates parent directories automatically if they don't exist +- Use this tool to create new files or completely replace existing file contents +- For partial edits to existing files, use the 'edit' tool instead + +Parameters: +- file_path: The absolute path to the file to write +- content: The content to write to the file + +Examples: +- Create new file: file_path="/workspace/src/new_file.py", content="print('hello')" +- Overwrite file: file_path="/workspace/config.json", content='{"key": "value"}' + +Note: This will overwrite existing files. Make sure to read the file first if you need to preserve any content. +""" + +WriteTool = ChatCompletionToolParam( + type='function', + function=ChatCompletionToolParamFunctionChunk( + name=WRITE_TOOL_NAME, + description=_WRITE_DESCRIPTION, + parameters={ + 'type': 'object', + 'required': ['file_path', 'content'], + 'properties': { + 'file_path': { + 'type': 'string', + 'description': 'The absolute path to the file to write', + }, + 'content': { + 'type': 'string', + 'description': 'The content to write to the file', + }, + }, + }, + ), +) + diff --git a/openhands/core/config/agent_config.py b/openhands/core/config/agent_config.py index 3c506c9382d2..713e1ad842e6 100644 --- a/openhands/core/config/agent_config.py +++ b/openhands/core/config/agent_config.py @@ -21,6 +21,12 @@ class AgentConfig(BaseModel): """The classpath of the agent to use. To be used for custom agents that are not defined in the openhands.agenthub package.""" system_prompt_filename: str = Field(default='system_prompt.j2') """Filename of the system prompt template file within the agent's prompt directory. Defaults to 'system_prompt.j2'.""" + custom_prompt_dir: str | None = Field(default=None) + """Optional custom directory containing prompt templates. If set, overrides the agent's default prompt directory.""" + system_prompt_path: str | None = Field(default=None) + """Optional absolute path to a custom system_prompt.j2 file. Takes precedence over custom_prompt_dir.""" + system_prompt_long_horizon_path: str | None = Field(default=None) + """Optional absolute path to a custom system_prompt_long_horizon.j2 file. Takes precedence over custom_prompt_dir.""" enable_browsing: bool = Field(default=True) """Whether to enable browsing tool. Note: If using CLIRuntime, browsing is not implemented and should be disabled.""" diff --git a/openhands/core/schema/action.py b/openhands/core/schema/action.py index 2aea98c7ff24..635ac2a0e470 100644 --- a/openhands/core/schema/action.py +++ b/openhands/core/schema/action.py @@ -103,3 +103,20 @@ class ActionType(str, Enum): VALIDATION_FAILURE = 'validation_failure' """Represents a validation failure for a function call.""" + + # OpenCode-style actions + GLOB = 'glob' + """Searches for files matching a glob pattern.""" + + GREP = 'grep' + """Searches file contents using a regex pattern.""" + + LIST_DIR = 'list_dir' + """Lists files and directories in a given path.""" + + OPENCODE_READ = 'opencode_read' + """Reads a file with OpenCode-style formatting (5-digit line numbers, binary detection, etc.).""" + + OPENCODE_WRITE = 'opencode_write' + """Writes a file with LSP diagnostics after write.""" + diff --git a/openhands/core/schema/observation.py b/openhands/core/schema/observation.py index 7f976fca3d8f..e317ba743d62 100644 --- a/openhands/core/schema/observation.py +++ b/openhands/core/schema/observation.py @@ -64,3 +64,20 @@ class ObservationType(str, Enum): VALIDATION_FAILURE = 'validation_failure' """Result of a validation failure for a function call""" + + # OpenCode-style observations + GLOB = 'glob' + """Result of a glob file search operation.""" + + GREP = 'grep' + """Result of a grep content search operation.""" + + LIST_DIR = 'list_dir' + """Result of a directory listing operation.""" + + OPENCODE_READ = 'opencode_read' + """Result of an OpenCode-style file read operation.""" + + OPENCODE_WRITE = 'opencode_write' + """Result of an OpenCode-style file write operation with diagnostics.""" + diff --git a/openhands/events/action/__init__.py b/openhands/events/action/__init__.py index 19b538c828e6..427c715f1ee7 100644 --- a/openhands/events/action/__init__.py +++ b/openhands/events/action/__init__.py @@ -24,6 +24,13 @@ ) from openhands.events.action.mcp import MCPAction from openhands.events.action.message import MessageAction, SystemMessageAction +from openhands.events.action.opencode import ( + GlobAction, + GrepAction, + ListDirAction, + OpenCodeReadAction, + OpenCodeWriteAction, +) __all__ = [ 'Action', @@ -49,4 +56,10 @@ 'ActionSecurityRisk', 'LoopRecoveryAction', 'ValidationFailureAction', + # OpenCode-style actions + 'GlobAction', + 'GrepAction', + 'ListDirAction', + 'OpenCodeReadAction', + 'OpenCodeWriteAction', ] diff --git a/openhands/events/action/opencode.py b/openhands/events/action/opencode.py new file mode 100644 index 000000000000..374171e3bff9 --- /dev/null +++ b/openhands/events/action/opencode.py @@ -0,0 +1,181 @@ +"""OpenCode-inspired action classes for enhanced file operations. + +These actions provide OpenCode-compatible functionality with: +- Ripgrep integration (with fallback to standard tools) +- OpenCode-style output formatting +- LSP/linter diagnostics support +- Binary file detection +- .gitignore support +""" + +from dataclasses import dataclass, field +from typing import ClassVar + +from openhands.core.schema import ActionType +from openhands.events.action.action import Action, ActionSecurityRisk + + +@dataclass +class OpenCodeReadAction(Action): + """Reads a file with OpenCode-style formatting. + + Features: + - 5-digit zero-padded line numbers with | separator (e.g., "00001| content") + - Binary file detection (by extension and content analysis) + - 50KB byte limit with truncation messages + - File suggestions when file not found + - 2000 character line length limit + + Attributes: + path: The path to the file to read + offset: Line number to start reading from (0-based). Default: 0 + limit: Number of lines to read. Default: 2000 + """ + + path: str + offset: int = 0 + limit: int = 2000 + thought: str = "" + action: str = ActionType.OPENCODE_READ + runnable: ClassVar[bool] = True + security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN + + @property + def message(self) -> str: + if self.offset > 0: + return f"Reading file: {self.path} (from line {self.offset + 1})" + return f"Reading file: {self.path}" + + +@dataclass +class OpenCodeWriteAction(Action): + """Writes a file with LSP/linter diagnostics after write. + + Features: + - Creates parent directories if needed + - Runs appropriate linter based on file extension: + - Python: flake8 → pylint → py_compile + - JavaScript/TypeScript: eslint + - Go: go vet + - Rust: cargo check + - Outputs diagnostics in OpenCode format + + Attributes: + path: The path to the file to write + content: The content to write to the file + """ + + path: str + content: str + thought: str = "" + action: str = ActionType.OPENCODE_WRITE + runnable: ClassVar[bool] = True + security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN + + @property + def message(self) -> str: + return f"Writing file: {self.path}" + + +@dataclass +class GlobAction(Action): + """Searches for files matching a glob pattern. + + Features: + - Uses ripgrep (respects .gitignore) with fallback to find + - Results sorted by modification time (newest first) + - Limited to 100 results + + Attributes: + pattern: The glob pattern to match files against (e.g., "*.py", "**/*.ts") + path: Directory to search in. Defaults to current directory. + """ + + pattern: str + path: str = "." + thought: str = "" + action: str = ActionType.GLOB + runnable: ClassVar[bool] = True + security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN + + @property + def message(self) -> str: + return f"Searching for files matching: {self.pattern}" + + +@dataclass +class GrepAction(Action): + """Searches file contents using a pattern. + + Features: + - Uses ripgrep (respects .gitignore) with fallback to grep + - Shows line numbers in output + - Limited to 100 results + + Attributes: + pattern: The pattern to search for in file contents + path: Directory to search in. Defaults to current directory. + include: Optional file pattern to filter which files to search (e.g., "*.py") + """ + + pattern: str + path: str = "." + include: str = "" + thought: str = "" + action: str = ActionType.GREP + runnable: ClassVar[bool] = True + security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN + + @property + def message(self) -> str: + return f"Searching for pattern: {self.pattern}" + + +@dataclass +class ListDirAction(Action): + """Lists files and directories in a tree structure. + + Features: + - Uses ripgrep (respects .gitignore) with fallback to tree/find + - Builds tree structure output + - Default ignore patterns for common directories + - Limited to 100 files + + Attributes: + path: The directory to list. Defaults to current directory. + ignore: Additional glob patterns to ignore (beyond defaults) + + Default ignore patterns: + node_modules, __pycache__, .git, dist, build, target, + vendor, .venv, venv, .cache + """ + + path: str = "." + ignore: list[str] = field(default_factory=list) + thought: str = "" + action: str = ActionType.LIST_DIR + runnable: ClassVar[bool] = True + security_risk: ActionSecurityRisk = ActionSecurityRisk.UNKNOWN + + # Default ignore patterns matching OpenCode behavior + DEFAULT_IGNORES: ClassVar[list[str]] = [ + "node_modules", + "__pycache__", + ".git", + "dist", + "build", + "target", + "vendor", + ".venv", + "venv", + ".cache", + ] + + @property + def all_ignores(self) -> list[str]: + """Returns combined default and custom ignore patterns.""" + return self.DEFAULT_IGNORES + self.ignore + + @property + def message(self) -> str: + return f"Listing directory: {self.path or 'current directory'}" diff --git a/openhands/events/serialization/action.py b/openhands/events/serialization/action.py index 98f2b89e6106..1d259932bdbf 100644 --- a/openhands/events/serialization/action.py +++ b/openhands/events/serialization/action.py @@ -27,6 +27,13 @@ ) from openhands.events.action.mcp import MCPAction from openhands.events.action.message import MessageAction, SystemMessageAction +from openhands.events.action.opencode import ( + GlobAction, + GrepAction, + ListDirAction, + OpenCodeReadAction, + OpenCodeWriteAction, +) actions = ( NullAction, @@ -50,6 +57,12 @@ MCPAction, TaskTrackingAction, LoopRecoveryAction, + # OpenCode-style actions + GlobAction, + GrepAction, + ListDirAction, + OpenCodeReadAction, + OpenCodeWriteAction, ) ACTION_TYPE_TO_CLASS = {action_class.action: action_class for action_class in actions} # type: ignore[attr-defined] diff --git a/openhands/llm/tool_names.py b/openhands/llm/tool_names.py index 7b037847f2fb..b8a7488b37da 100644 --- a/openhands/llm/tool_names.py +++ b/openhands/llm/tool_names.py @@ -6,3 +6,11 @@ FINISH_TOOL_NAME = 'finish' LLM_BASED_EDIT_TOOL_NAME = 'edit_file' TASK_TRACKER_TOOL_NAME = 'task_tracker' + +# OpenCode-inspired tools +GLOB_TOOL_NAME = 'glob' +GREP_TOOL_NAME = 'grep' +LIST_DIR_TOOL_NAME = 'list_dir' +READ_TOOL_NAME = 'read' +WRITE_TOOL_NAME = 'write' +EDIT_TOOL_NAME = 'edit' diff --git a/openhands/memory/conversation_memory.py b/openhands/memory/conversation_memory.py index 22847365edd2..bbd0ea2af944 100644 --- a/openhands/memory/conversation_memory.py +++ b/openhands/memory/conversation_memory.py @@ -21,6 +21,13 @@ TaskTrackingAction, ) from openhands.events.action.mcp import MCPAction +from openhands.events.action.opencode import ( + GlobAction, + GrepAction, + ListDirAction, + OpenCodeReadAction, + OpenCodeWriteAction, +) from openhands.events.action.message import SystemMessageAction from openhands.events.event import Event, RecallType from openhands.events.observation import ( @@ -32,6 +39,7 @@ FileDownloadObservation, FileEditObservation, FileReadObservation, + FileWriteObservation, IPythonRunCellObservation, LoopDetectionObservation, TaskTrackingObservation, @@ -232,6 +240,12 @@ def _process_action( BrowseURLAction, MCPAction, TaskTrackingAction, + # OpenCode-style actions + GlobAction, + GrepAction, + ListDirAction, + OpenCodeReadAction, + OpenCodeWriteAction, ), ) or (isinstance(action, CmdRunAction) and action.source == 'agent'): tool_metadata = action.tool_call_metadata @@ -481,6 +495,12 @@ def _process_observation( message = Message( role='user', content=[TextContent(text=obs.content)] ) # Content is already truncated by openhands-aci + elif isinstance(obs, FileWriteObservation): + text = truncate_content( + f'File written successfully: {obs.path}\n{obs.content}', + max_message_chars, + ) + message = Message(role='user', content=[TextContent(text=text)]) elif isinstance(obs, BrowserOutputObservation): text = obs.content content = [TextContent(text=text)] diff --git a/openhands/runtime/action_execution_server.py b/openhands/runtime/action_execution_server.py index ad0b4acf2ffa..1d7219eb089e 100644 --- a/openhands/runtime/action_execution_server.py +++ b/openhands/runtime/action_execution_server.py @@ -24,7 +24,12 @@ from fastapi.exceptions import RequestValidationError from fastapi.responses import FileResponse, JSONResponse from fastapi.security import APIKeyHeader -from openhands_aci.editor.editor import OHEditor +# Use OpenCodeEditor with fuzzy matching instead of default OHEditor +try: + from openhands.agenthub.codeact_agent.tools.opencode_editor import OpenCodeEditor as OHEditor +except ImportError: + # Fallback to standard OHEditor if OpenCodeEditor not available (e.g., in sandbox) + from openhands_aci.editor.editor import OHEditor from openhands_aci.editor.exceptions import ToolError from openhands_aci.editor.results import ToolResult from openhands_aci.utils.diff import get_diff @@ -45,7 +50,12 @@ FileEditAction, FileReadAction, FileWriteAction, + GlobAction, + GrepAction, IPythonRunCellAction, + ListDirAction, + OpenCodeReadAction, + OpenCodeWriteAction, ) from openhands.events.event import FileEditSource, FileReadSource from openhands.events.observation import ( @@ -588,6 +598,451 @@ async def edit(self, action: FileEditAction) -> Observation: ), ) + # ========================================================================= + # OpenCode-style action handlers + # ========================================================================= + + async def opencode_read(self, action: OpenCodeReadAction) -> Observation: + """Execute OpenCode-style file read with 5-digit line numbers.""" + assert self.bash_session is not None + working_dir = self.bash_session.cwd + filepath = self._resolve_path(action.path, working_dir) + + # Constants matching OpenCode behavior + MAX_BYTES = 50 * 1024 # 50KB + MAX_LINE_LENGTH = 2000 + BINARY_EXTENSIONS = { + '.zip', '.tar', '.gz', '.exe', '.dll', '.so', '.class', '.jar', + '.war', '.7z', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', + '.bin', '.dat', '.obj', '.o', '.a', '.lib', '.wasm', '.pyc', '.pyo' + } + + # Check if file exists + if not os.path.exists(filepath): + # Try to find suggestions + directory = os.path.dirname(filepath) or '.' + basename = os.path.basename(filepath) + + if os.path.isdir(directory): + try: + entries = os.listdir(directory) + suggestions = [ + os.path.join(directory, entry) + for entry in entries + if basename.lower() in entry.lower() or entry.lower() in basename.lower() + ][:3] + + if suggestions: + return ErrorObservation( + f"File not found: {filepath}\n\nDid you mean one of these?\n" + + "\n".join(suggestions) + ) + except OSError: + pass + + return ErrorObservation(f"File not found: {filepath}") + + # Check if directory + if os.path.isdir(filepath): + return ErrorObservation(f"Path is a directory: {filepath}. You can only read files") + + # Check binary by extension + ext = os.path.splitext(filepath)[1].lower() + if ext in BINARY_EXTENSIONS: + return ErrorObservation(f"Cannot read binary file: {filepath}") + + # Check binary by content + try: + with open(filepath, 'rb') as f: + chunk = f.read(4096) + if b'\x00' in chunk: + return ErrorObservation(f"Cannot read binary file: {filepath}") + if chunk: + non_printable = sum(1 for b in chunk if b < 9 or (b > 13 and b < 32)) + if non_printable / len(chunk) > 0.3: + return ErrorObservation(f"Cannot read binary file: {filepath}") + except Exception: + pass + + # Read file + try: + with open(filepath, 'r', encoding='utf-8', errors='replace') as f: + lines = f.read().split('\n') + except Exception as e: + return ErrorObservation(f"Error reading file: {e}") + + # Process lines with offset and limit + offset = action.offset + limit = action.limit + raw = [] + total_bytes = 0 + truncated_by_bytes = False + + for i in range(offset, min(len(lines), offset + limit)): + line = lines[i] + if len(line) > MAX_LINE_LENGTH: + line = line[:MAX_LINE_LENGTH] + "..." + + line_bytes = len(line.encode('utf-8')) + (1 if raw else 0) + if total_bytes + line_bytes > MAX_BYTES: + truncated_by_bytes = True + break + + raw.append(line) + total_bytes += line_bytes + + # Format with 5-digit line numbers and | separator (OpenCode style) + content_lines = [ + f"{str(i + offset + 1).zfill(5)}| {line}" + for i, line in enumerate(raw) + ] + + total_lines = len(lines) + last_read_line = offset + len(raw) + has_more_lines = total_lines > last_read_line + truncated = has_more_lines or truncated_by_bytes + + output = "\n" + output += "\n".join(content_lines) + + if truncated_by_bytes: + output += f"\n\n(Output truncated at {MAX_BYTES} bytes. Use 'offset' parameter to read beyond line {last_read_line})" + elif has_more_lines: + output += f"\n\n(File has more lines. Use 'offset' parameter to read beyond line {last_read_line})" + else: + output += f"\n\n(End of file - total {total_lines} lines)" + + output += "\n" + + return CmdOutputObservation( + content=output, + command_id=-1, + command=f"opencode_read {filepath}", + ) + + async def opencode_write(self, action: OpenCodeWriteAction) -> Observation: + """Execute OpenCode-style file write with LSP diagnostics.""" + assert self.bash_session is not None + working_dir = self.bash_session.cwd + filepath = self._resolve_path(action.path, working_dir) + + # Create directory if needed + directory = os.path.dirname(filepath) + if directory and not os.path.exists(directory): + try: + os.makedirs(directory, exist_ok=True) + except OSError as e: + return ErrorObservation(f"Failed to create directory: {e}") + + # Write file + try: + with open(filepath, 'w', encoding='utf-8') as f: + f.write(action.content) + except Exception as e: + return ErrorObservation(f"Failed to write file: {e}") + + output = "Wrote file successfully." + + # Run linter based on file extension + ext = os.path.splitext(filepath)[1].lower() + errors = [] + + try: + import subprocess + + if ext == '.py': + # Try flake8, pylint, py_compile in order + for linter_cmd in [ + ['flake8', '--max-line-length=120', filepath], + ['pylint', '--errors-only', filepath], + ['python3', '-m', 'py_compile', filepath], + ]: + try: + result = subprocess.run( + linter_cmd, capture_output=True, text=True, timeout=10 + ) + lint_output = result.stdout.strip() or result.stderr.strip() + if lint_output: + errors.extend(lint_output.split('\n')[:20]) + break + except (FileNotFoundError, subprocess.TimeoutExpired): + continue + + elif ext in ('.js', '.jsx', '.ts', '.tsx'): + try: + result = subprocess.run( + ['eslint', '--format=compact', filepath], + capture_output=True, text=True, timeout=10 + ) + if result.stdout.strip(): + errors.extend(result.stdout.strip().split('\n')[:20]) + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + elif ext == '.go': + try: + result = subprocess.run( + ['go', 'vet', filepath], + capture_output=True, text=True, timeout=10 + ) + if result.stderr.strip(): + errors.extend(result.stderr.strip().split('\n')[:20]) + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + elif ext == '.rs': + try: + result = subprocess.run( + ['cargo', 'check', '--message-format=short'], + capture_output=True, text=True, timeout=30 + ) + if result.stderr.strip(): + error_lines = [ + l for l in result.stderr.strip().split('\n') + if 'error' in l.lower() + ][:20] + errors.extend(error_lines) + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + except Exception: + pass + + if errors: + output += f'\n\nLSP errors detected in this file, please fix:\n' + output += f'\n' + output += '\n'.join(errors) + output += '\n' + + return FileWriteObservation(content=output, path=filepath) + + async def glob(self, action: GlobAction) -> Observation: + """Execute glob file search using ripgrep or find.""" + assert self.bash_session is not None + working_dir = self.bash_session.cwd + search_path = self._resolve_path(action.path, working_dir) + + import subprocess + + files = [] + truncated = False + limit = 100 + + # Try ripgrep first (respects .gitignore, sorts by mtime) + try: + result = subprocess.run( + ['rg', '--files', '-g', action.pattern, '--sortr', 'modified', search_path], + capture_output=True, text=True, timeout=30, cwd=working_dir + ) + if result.returncode == 0 and result.stdout.strip(): + all_files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] + if len(all_files) > limit: + truncated = True + files = all_files[:limit] + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + # Fallback to find with mtime sorting + if not files: + try: + result = subprocess.run( + ['find', search_path, '-type', 'f', '-name', action.pattern, + '-printf', '%T@ %p\n'], + capture_output=True, text=True, timeout=30, cwd=working_dir + ) + if result.stdout.strip(): + lines = result.stdout.strip().split('\n') + # Sort by mtime (first field) descending + sorted_lines = sorted(lines, key=lambda x: float(x.split()[0]) if x else 0, reverse=True) + all_files = [' '.join(l.split()[1:]) for l in sorted_lines if l] + if len(all_files) > limit: + truncated = True + files = all_files[:limit] + except (FileNotFoundError, subprocess.TimeoutExpired, ValueError): + pass + + # Build output + if not files: + output = "No files found" + else: + output = '\n'.join(files) + if truncated: + output += '\n\n(Results are truncated. Consider using a more specific path or pattern.)' + + return CmdOutputObservation( + content=output, + command_id=-1, + command=f"glob {action.pattern} {action.path}", + ) + + async def grep(self, action: GrepAction) -> Observation: + """Execute grep content search using ripgrep or grep.""" + assert self.bash_session is not None + working_dir = self.bash_session.cwd + search_path = self._resolve_path(action.path, working_dir) + + import subprocess + + output = "" + limit = 100 + + # Try ripgrep first (respects .gitignore) + try: + cmd = ['rg', '-n', action.pattern, search_path] + if action.include: + cmd = ['rg', '-n', '-g', action.include, action.pattern, search_path] + + result = subprocess.run( + cmd, capture_output=True, text=True, timeout=30, cwd=working_dir + ) + if result.stdout.strip(): + lines = result.stdout.strip().split('\n') + if len(lines) > limit: + output = '\n'.join(lines[:limit]) + output += f'\n\n(Results truncated, showing {limit} of {len(lines)}+ matches)' + else: + output = '\n'.join(lines) + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + # Fallback to grep + if not output: + try: + if action.include: + # Use find + grep for file filtering + result = subprocess.run( + f'find {search_path} -type f -name "{action.include}" ' + f'-exec grep -Hn "{action.pattern}" {{}} \\; 2>/dev/null | head -{limit}', + shell=True, capture_output=True, text=True, timeout=30, cwd=working_dir + ) + else: + result = subprocess.run( + f'grep -rn "{action.pattern}" {search_path} 2>/dev/null | head -{limit}', + shell=True, capture_output=True, text=True, timeout=30, cwd=working_dir + ) + output = result.stdout.strip() or "No matches found" + except (subprocess.TimeoutExpired, Exception): + output = "No matches found" + + if not output: + output = "No matches found" + + return CmdOutputObservation( + content=output, + command_id=-1, + command=f"grep {action.pattern} {action.path}", + ) + + async def list_dir(self, action: ListDirAction) -> Observation: + """Execute directory listing with tree structure.""" + assert self.bash_session is not None + working_dir = self.bash_session.cwd + list_path = self._resolve_path(action.path, working_dir) + + import subprocess + + # Combine default and custom ignore patterns + all_ignores = action.all_ignores + + files = [] + limit = 100 + + # Try ripgrep first (respects .gitignore) + try: + cmd = ['rg', '--files'] + for pattern in all_ignores: + cmd.extend(['-g', f'!{pattern}/**']) + if list_path != '.': + cmd.append(list_path) + + result = subprocess.run( + cmd, capture_output=True, text=True, timeout=30, cwd=working_dir + ) + if result.returncode == 0 and result.stdout.strip(): + files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()][:limit] + except (FileNotFoundError, subprocess.TimeoutExpired): + pass + + # Build tree structure if we have files + if files: + dirs = set() + files_by_dir = {} + + for f in files: + d = os.path.dirname(f) or '.' + parts = d.split(os.sep) if d != '.' else [] + + # Add all parent directories + for i in range(len(parts) + 1): + dir_p = os.sep.join(parts[:i]) if i > 0 else '.' + dirs.add(dir_p) + + # Add file to its directory + if d not in files_by_dir: + files_by_dir[d] = [] + files_by_dir[d].append(os.path.basename(f)) + + def render_dir(dir_path: str, depth: int) -> str: + output = '' + if depth > 0: + output += ' ' * depth + os.path.basename(dir_path) + '/\n' + + # Get child directories + children = sorted([ + d for d in dirs + if os.path.dirname(d) == dir_path and d != dir_path + ]) + + # Render subdirectories first + for child in children: + output += render_dir(child, depth + 1) + + # Render files + for f in sorted(files_by_dir.get(dir_path, [])): + output += ' ' * (depth + 1) + f + '\n' + + return output + + abs_path = os.path.abspath(list_path) + output = f"{abs_path}/\n" + render_dir('.', 0) + else: + # Fallback to tree or find + try: + # Try tree command + ignore_args = [] + for p in all_ignores: + ignore_args.extend(['-I', p]) + + result = subprocess.run( + ['tree', '-L', '3', '--noreport'] + ignore_args + [list_path], + capture_output=True, text=True, timeout=10, cwd=working_dir + ) + output = result.stdout.strip() + except FileNotFoundError: + # Fallback to find + try: + result = subprocess.run( + ['find', list_path, '-maxdepth', '3', '-type', 'f'], + capture_output=True, text=True, timeout=10, cwd=working_dir + ) + lines = result.stdout.strip().split('\n') + # Filter out ignored patterns + filtered = [ + l for l in lines + if l and not any(p in l for p in all_ignores) + ][:limit] + output = '\n'.join(filtered) if filtered else 'No files found' + except Exception: + output = 'No files found' + except subprocess.TimeoutExpired: + output = 'Directory listing timed out' + + return CmdOutputObservation( + content=output, + command_id=-1, + command=f"list_dir {action.path}", + ) + async def browse(self, action: BrowseURLAction) -> Observation: if self.browser is None: return ErrorObservation( diff --git a/openhands/runtime/impl/action_execution/action_execution_client.py b/openhands/runtime/impl/action_execution/action_execution_client.py index f1066ea113eb..ee15740f172f 100644 --- a/openhands/runtime/impl/action_execution/action_execution_client.py +++ b/openhands/runtime/impl/action_execution/action_execution_client.py @@ -31,6 +31,13 @@ IPythonRunCellAction, ValidationFailureAction, ) +from openhands.events.action.opencode import ( + GlobAction, + GrepAction, + ListDirAction, + OpenCodeReadAction, + OpenCodeWriteAction, +) from openhands.events.action.action import Action from openhands.events.action.files import FileEditSource from openhands.events.action.mcp import MCPAction @@ -370,6 +377,25 @@ def browse(self, action: BrowseURLAction) -> Observation: def browse_interactive(self, action: BrowseInteractiveAction) -> Observation: return self.send_action_for_execution(action) + # ========================================================================= + # OpenCode-style action handlers + # ========================================================================= + + def glob(self, action: GlobAction) -> Observation: + return self.send_action_for_execution(action) + + def grep(self, action: GrepAction) -> Observation: + return self.send_action_for_execution(action) + + def list_dir(self, action: ListDirAction) -> Observation: + return self.send_action_for_execution(action) + + def opencode_read(self, action: OpenCodeReadAction) -> Observation: + return self.send_action_for_execution(action) + + def opencode_write(self, action: OpenCodeWriteAction) -> Observation: + return self.send_action_for_execution(action) + def get_mcp_config( self, extra_stdio_servers: list[MCPStdioServerConfig] | None = None ) -> MCPConfig: diff --git a/openhands/runtime/impl/cli/cli_runtime.py b/openhands/runtime/impl/cli/cli_runtime.py index ae4752c2a825..7b5273fbe713 100644 --- a/openhands/runtime/impl/cli/cli_runtime.py +++ b/openhands/runtime/impl/cli/cli_runtime.py @@ -16,7 +16,12 @@ from typing import TYPE_CHECKING, Any, Callable from binaryornot.check import is_binary -from openhands_aci.editor.editor import OHEditor +# Use OpenCodeEditor with fuzzy matching instead of default OHEditor +try: + from openhands.agenthub.codeact_agent.tools.opencode_editor import OpenCodeEditor as OHEditor +except ImportError: + # Fallback to standard OHEditor if OpenCodeEditor not available + from openhands_aci.editor.editor import OHEditor from openhands_aci.editor.exceptions import ToolError from openhands_aci.editor.results import ToolResult from openhands_aci.utils.diff import get_diff diff --git a/openhands/utils/prompt.py b/openhands/utils/prompt.py index 85c010138f12..4b1cbef74e45 100644 --- a/openhands/utils/prompt.py +++ b/openhands/utils/prompt.py @@ -2,13 +2,68 @@ from dataclasses import dataclass, field from itertools import islice -from jinja2 import Environment, FileSystemLoader, Template +from jinja2 import BaseLoader, Environment, FileSystemLoader, Template, TemplateNotFound from openhands.controller.state.state import State from openhands.core.message import Message, TextContent from openhands.events.observation.agent import MicroagentKnowledge +class OverrideLoader(BaseLoader): + """A Jinja2 loader that allows specific templates to be loaded from custom paths. + + This loader checks if a template has a custom path override, and if so, loads from that path. + Otherwise, it falls back to loading from the default directory. + This allows {% include %} directives to work correctly while supporting custom template paths. + """ + + def __init__(self, default_dir: str, overrides: dict[str, str] | None = None): + """Initialize the loader. + + Args: + default_dir: Default directory to load templates from + overrides: Dict mapping template names to absolute file paths + """ + self.default_dir = default_dir + self.overrides = overrides or {} + self.default_loader = FileSystemLoader(default_dir) + + # Build a list of directories for included templates + # This allows {% include %} to find overridden templates + self.include_dirs = [default_dir] + for path in self.overrides.values(): + if path and os.path.isfile(path): + dir_path = os.path.dirname(path) + if dir_path not in self.include_dirs: + self.include_dirs.append(dir_path) + + self.include_loader = FileSystemLoader(self.include_dirs) + + def get_source(self, environment, template): + # Check if this template has an override + if template in self.overrides: + override_path = self.overrides[template] + if override_path and os.path.isfile(override_path): + with open(override_path, 'r', encoding='utf-8') as f: + source = f.read() + return source, override_path, lambda: True + + # Also check by filename in case the override uses just the filename + template_basename = os.path.basename(template) + if template_basename in self.overrides: + override_path = self.overrides[template_basename] + if override_path and os.path.isfile(override_path): + with open(override_path, 'r', encoding='utf-8') as f: + source = f.read() + return source, override_path, lambda: True + + # Fall back to the include loader which searches all directories + try: + return self.include_loader.get_source(environment, template) + except TemplateNotFound: + raise TemplateNotFound(template) + + @dataclass class RuntimeInfo: date: str @@ -53,12 +108,29 @@ def __init__( self, prompt_dir: str, system_prompt_filename: str = 'system_prompt.j2', + template_overrides: dict[str, str] | None = None, ): + """Initialize the PromptManager. + + Args: + prompt_dir: Default directory containing prompt templates. + system_prompt_filename: Name of the system prompt template file. + template_overrides: Optional dict mapping template names to absolute file paths. + E.g., {'system_prompt.j2': '/path/to/custom/system_prompt.j2'} + """ if prompt_dir is None: raise ValueError('Prompt directory is not set') self.prompt_dir: str = prompt_dir - self.env = Environment(loader=FileSystemLoader(prompt_dir)) + self.template_overrides = template_overrides or {} + + # Use OverrideLoader if there are custom overrides, otherwise use standard loader + if self.template_overrides: + loader = OverrideLoader(prompt_dir, self.template_overrides) + else: + loader = FileSystemLoader(prompt_dir) + + self.env = Environment(loader=loader) self.system_template: Template = self._load_template(system_prompt_filename) self.user_template: Template = self._load_template('user_prompt.j2') self.additional_info_template: Template = self._load_template( diff --git a/tests/runtime/test_opencode_tools.py b/tests/runtime/test_opencode_tools.py new file mode 100644 index 000000000000..9b5ac1f87345 --- /dev/null +++ b/tests/runtime/test_opencode_tools.py @@ -0,0 +1,786 @@ +"""Integration tests for OpenCode-style tools runtime execution. + +These tests verify the runtime handlers for: +- OpenCodeReadAction (opencode_read) +- OpenCodeWriteAction (opencode_write) +- GlobAction (glob) +- GrepAction (grep) +- ListDirAction (list_dir) + +Tests run against the actual runtime (Docker/Local/CLI) to ensure +proper execution of file operations. +""" + +import os +import time +from pathlib import Path + +import pytest +from conftest import _close_test_runtime, _load_runtime + +from openhands.core.logger import openhands_logger as logger +from openhands.events.action import ( + CmdRunAction, + GlobAction, + GrepAction, + ListDirAction, + OpenCodeReadAction, + OpenCodeWriteAction, +) +from openhands.events.observation import ( + CmdOutputObservation, + ErrorObservation, + FileWriteObservation, +) + + +# ============================================================================== +# Test Fixtures and Helpers +# ============================================================================== + + +def _run_action(runtime, action): + """Execute an action and return the observation.""" + logger.info(action, extra={'msg_type': 'ACTION'}) + obs = runtime.run_action(action) + logger.info(obs, extra={'msg_type': 'OBSERVATION'}) + return obs + + +def _create_test_file(runtime, path: str, content: str): + """Helper to create a test file in the sandbox.""" + action = CmdRunAction( + command=f'mkdir -p "$(dirname "{path}")" && cat > "{path}" << \'TESTEOF\'\n{content}\nTESTEOF' + ) + action.set_hard_timeout(30) + obs = runtime.run_action(action) + assert isinstance(obs, CmdOutputObservation), f"Failed to create file: {obs}" + return obs + + +def _create_test_directory_structure(runtime, base_path: str): + """Create a test directory structure for glob/grep tests.""" + # Create directory structure + files = { + f'{base_path}/src/main.py': 'def main():\n print("Hello")\n\nmain()', + f'{base_path}/src/utils.py': '# Utility functions\ndef helper():\n return 42', + f'{base_path}/src/lib/core.py': 'class Core:\n pass', + f'{base_path}/tests/test_main.py': 'import pytest\n\ndef test_main():\n assert True', + f'{base_path}/tests/test_utils.py': 'def test_helper():\n pass', + f'{base_path}/docs/readme.md': '# Project README\n\nThis is a test project.', + f'{base_path}/config.json': '{"name": "test", "version": "1.0.0"}', + f'{base_path}/.gitignore': 'node_modules/\n__pycache__/\n*.pyc', + } + + for path, content in files.items(): + _create_test_file(runtime, path, content) + + return files + + +# ============================================================================== +# OpenCodeReadAction Tests +# ============================================================================== + + +class TestOpenCodeRead: + """Tests for OpenCodeReadAction runtime handler.""" + + def test_read_existing_file(self, temp_dir, runtime_cls, run_as_openhands): + """Test reading an existing file returns correct content with line numbers.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + # Create test file + sandbox_path = config.workspace_mount_path_in_sandbox + test_content = 'line 1\nline 2\nline 3\nline 4\nline 5' + _create_test_file(runtime, f'{sandbox_path}/test.txt', test_content) + + # Read file + action = OpenCodeReadAction(path=f'{sandbox_path}/test.txt') + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + assert '' in obs.content + assert '' in obs.content + # Check line number format (5-digit zero-padded with |) + assert '00001| line 1' in obs.content + assert '00002| line 2' in obs.content + assert '00005| line 5' in obs.content + finally: + _close_test_runtime(runtime) + + def test_read_with_offset(self, temp_dir, runtime_cls, run_as_openhands): + """Test reading file with offset parameter.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + test_content = '\n'.join([f'line {i}' for i in range(1, 21)]) + _create_test_file(runtime, f'{sandbox_path}/offset_test.txt', test_content) + + # Read starting from line 10 (0-based offset 9) + action = OpenCodeReadAction(path=f'{sandbox_path}/offset_test.txt', offset=9) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should start from line 10 (offset 9 + 1 = line 10) + assert '00010| line 10' in obs.content + # Should NOT contain early lines + assert '00001| line 1' not in obs.content + finally: + _close_test_runtime(runtime) + + def test_read_with_limit(self, temp_dir, runtime_cls, run_as_openhands): + """Test reading file with limit parameter.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + test_content = '\n'.join([f'line {i}' for i in range(1, 101)]) + _create_test_file(runtime, f'{sandbox_path}/limit_test.txt', test_content) + + # Read only first 5 lines + action = OpenCodeReadAction(path=f'{sandbox_path}/limit_test.txt', limit=5) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + assert '00001| line 1' in obs.content + assert '00005| line 5' in obs.content + # Should indicate there are more lines + assert 'more lines' in obs.content.lower() or 'offset' in obs.content.lower() + finally: + _close_test_runtime(runtime) + + def test_read_nonexistent_file(self, temp_dir, runtime_cls, run_as_openhands): + """Test reading non-existent file returns proper error with suggestions.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + # Create a similar file for suggestions + _create_test_file(runtime, f'{sandbox_path}/existing_file.py', 'content') + + action = OpenCodeReadAction(path=f'{sandbox_path}/existing_file.txt') + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + # Should be error or contain error message + assert isinstance(obs, (ErrorObservation, CmdOutputObservation)) + content = obs.content if hasattr(obs, 'content') else str(obs) + assert 'not found' in content.lower() or 'error' in content.lower() + finally: + _close_test_runtime(runtime) + + def test_read_empty_file(self, temp_dir, runtime_cls, run_as_openhands): + """Test reading an empty file.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + # Create empty file + action = CmdRunAction(command=f'touch {sandbox_path}/empty.txt') + action.set_hard_timeout(30) + runtime.run_action(action) + + action = OpenCodeReadAction(path=f'{sandbox_path}/empty.txt') + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + assert '' in obs.content + finally: + _close_test_runtime(runtime) + + def test_read_long_lines_truncation(self, temp_dir, runtime_cls, run_as_openhands): + """Test that very long lines are truncated.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + # Create file with very long line (> 2000 chars) + long_line = 'x' * 3000 + _create_test_file(runtime, f'{sandbox_path}/long_line.txt', long_line) + + action = OpenCodeReadAction(path=f'{sandbox_path}/long_line.txt') + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Line should be truncated with ... + assert '...' in obs.content + finally: + _close_test_runtime(runtime) + + +# ============================================================================== +# OpenCodeWriteAction Tests +# ============================================================================== + + +class TestOpenCodeWrite: + """Tests for OpenCodeWriteAction runtime handler.""" + + def test_write_new_file(self, temp_dir, runtime_cls, run_as_openhands): + """Test writing a new file.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + test_content = 'print("Hello, World!")' + + action = OpenCodeWriteAction( + path=f'{sandbox_path}/new_file.py', + content=test_content + ) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, FileWriteObservation) + assert 'success' in obs.content.lower() or obs.path.endswith('new_file.py') + + # Verify file exists with correct content + verify = CmdRunAction(command=f'cat {sandbox_path}/new_file.py') + verify.set_hard_timeout(30) + verify_obs = runtime.run_action(verify) + assert test_content in verify_obs.content + finally: + _close_test_runtime(runtime) + + def test_write_with_nested_directory(self, temp_dir, runtime_cls, run_as_openhands): + """Test writing creates parent directories if needed.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + nested_path = f'{sandbox_path}/new/nested/dir/file.txt' + + action = OpenCodeWriteAction( + path=nested_path, + content='nested content' + ) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, FileWriteObservation) + + # Verify directory was created + verify = CmdRunAction(command=f'test -f {nested_path} && echo "exists"') + verify.set_hard_timeout(30) + verify_obs = runtime.run_action(verify) + assert 'exists' in verify_obs.content + finally: + _close_test_runtime(runtime) + + def test_write_overwrite_existing(self, temp_dir, runtime_cls, run_as_openhands): + """Test writing overwrites existing file.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_file(runtime, f'{sandbox_path}/overwrite.txt', 'original content') + + # Overwrite with new content + action = OpenCodeWriteAction( + path=f'{sandbox_path}/overwrite.txt', + content='new content' + ) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, FileWriteObservation) + + # Verify content was overwritten + verify = CmdRunAction(command=f'cat {sandbox_path}/overwrite.txt') + verify.set_hard_timeout(30) + verify_obs = runtime.run_action(verify) + assert 'new content' in verify_obs.content + assert 'original content' not in verify_obs.content + finally: + _close_test_runtime(runtime) + + def test_write_empty_content(self, temp_dir, runtime_cls, run_as_openhands): + """Test writing empty content creates empty file.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + + action = OpenCodeWriteAction( + path=f'{sandbox_path}/empty_write.txt', + content='' + ) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, FileWriteObservation) + + # Verify file is empty + verify = CmdRunAction(command=f'wc -c < {sandbox_path}/empty_write.txt') + verify.set_hard_timeout(30) + verify_obs = runtime.run_action(verify) + assert '0' in verify_obs.content.strip() + finally: + _close_test_runtime(runtime) + + def test_write_multiline_content(self, temp_dir, runtime_cls, run_as_openhands): + """Test writing multiline content preserves line breaks.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + multiline = 'line 1\nline 2\nline 3' + + action = OpenCodeWriteAction( + path=f'{sandbox_path}/multiline.txt', + content=multiline + ) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, FileWriteObservation) + + # Verify line count + verify = CmdRunAction(command=f'wc -l < {sandbox_path}/multiline.txt') + verify.set_hard_timeout(30) + verify_obs = runtime.run_action(verify) + # Should have 2 newlines (3 lines) + assert int(verify_obs.content.strip()) >= 2 + finally: + _close_test_runtime(runtime) + + def test_write_python_with_linting(self, temp_dir, runtime_cls, run_as_openhands): + """Test writing Python file shows lint errors.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + # Intentionally bad Python (syntax error) + bad_python = 'def foo(\n print("missing paren"' + + action = OpenCodeWriteAction( + path=f'{sandbox_path}/bad.py', + content=bad_python + ) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + # File should still be written + assert isinstance(obs, FileWriteObservation) + # May or may not show diagnostics depending on linter availability + finally: + _close_test_runtime(runtime) + + +# ============================================================================== +# GlobAction Tests +# ============================================================================== + + +class TestGlob: + """Tests for GlobAction runtime handler.""" + + def test_glob_find_python_files(self, temp_dir, runtime_cls, run_as_openhands): + """Test glob finds Python files.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_directory_structure(runtime, sandbox_path) + + action = GlobAction(pattern='*.py', path=sandbox_path) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should find Python files + assert '.py' in obs.content + # Should find at least some of our test files + content_lower = obs.content.lower() + assert 'main.py' in content_lower or 'utils.py' in content_lower + finally: + _close_test_runtime(runtime) + + def test_glob_recursive_pattern(self, temp_dir, runtime_cls, run_as_openhands): + """Test glob with recursive pattern finds files in subdirectories.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_directory_structure(runtime, sandbox_path) + + action = GlobAction(pattern='**/*.py', path=sandbox_path) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should find files in subdirectories + assert 'test_' in obs.content.lower() or 'core.py' in obs.content.lower() + finally: + _close_test_runtime(runtime) + + def test_glob_no_matches(self, temp_dir, runtime_cls, run_as_openhands): + """Test glob returns appropriate message when no files match.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + # Create a simple file + _create_test_file(runtime, f'{sandbox_path}/test.txt', 'content') + + action = GlobAction(pattern='*.nonexistent', path=sandbox_path) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should indicate no files found + assert 'no files' in obs.content.lower() or obs.content.strip() == '' + finally: + _close_test_runtime(runtime) + + def test_glob_specific_extension(self, temp_dir, runtime_cls, run_as_openhands): + """Test glob finds only specific file extensions.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_directory_structure(runtime, sandbox_path) + + action = GlobAction(pattern='*.json', path=sandbox_path) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + assert 'config.json' in obs.content + # Should NOT find Python files + assert 'main.py' not in obs.content + finally: + _close_test_runtime(runtime) + + def test_glob_in_specific_directory(self, temp_dir, runtime_cls, run_as_openhands): + """Test glob searches only in specified directory.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_directory_structure(runtime, sandbox_path) + + # Search only in tests directory + action = GlobAction(pattern='*.py', path=f'{sandbox_path}/tests') + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should find test files + assert 'test_' in obs.content.lower() + # Should NOT find src files + assert 'main.py' not in obs.content + finally: + _close_test_runtime(runtime) + + +# ============================================================================== +# GrepAction Tests +# ============================================================================== + + +class TestGrep: + """Tests for GrepAction runtime handler.""" + + def test_grep_simple_pattern(self, temp_dir, runtime_cls, run_as_openhands): + """Test grep finds simple pattern.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_directory_structure(runtime, sandbox_path) + + action = GrepAction(pattern='def', path=sandbox_path) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should find function definitions + assert 'def' in obs.content + # Should show line numbers + assert ':' in obs.content # file:linenum:content format + finally: + _close_test_runtime(runtime) + + def test_grep_with_include_filter(self, temp_dir, runtime_cls, run_as_openhands): + """Test grep with file type filter.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_directory_structure(runtime, sandbox_path) + + # Search only in Python files + action = GrepAction(pattern='import', path=sandbox_path, include='*.py') + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should find imports in Python files + if 'import' in obs.content: + assert '.py' in obs.content + finally: + _close_test_runtime(runtime) + + def test_grep_no_matches(self, temp_dir, runtime_cls, run_as_openhands): + """Test grep returns appropriate message when no matches.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_file(runtime, f'{sandbox_path}/test.txt', 'hello world') + + action = GrepAction(pattern='xyznonexistent123', path=sandbox_path) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should indicate no matches + assert 'no match' in obs.content.lower() or obs.content.strip() == '' + finally: + _close_test_runtime(runtime) + + def test_grep_regex_pattern(self, temp_dir, runtime_cls, run_as_openhands): + """Test grep with regex pattern.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_directory_structure(runtime, sandbox_path) + + # Search for function definitions + action = GrepAction(pattern=r'def \w+\(', path=sandbox_path) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should find function definitions + assert 'def' in obs.content + finally: + _close_test_runtime(runtime) + + def test_grep_case_sensitive(self, temp_dir, runtime_cls, run_as_openhands): + """Test grep is case-sensitive by default.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_file( + runtime, + f'{sandbox_path}/case_test.txt', + 'Hello\nhello\nHELLO' + ) + + # Search for lowercase 'hello' + action = GrepAction(pattern='hello', path=f'{sandbox_path}/case_test.txt') + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should find lowercase + assert 'hello' in obs.content.lower() + finally: + _close_test_runtime(runtime) + + def test_grep_multiline_context(self, temp_dir, runtime_cls, run_as_openhands): + """Test grep shows file:line:content format.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + content = 'line1\nTARGET_PATTERN\nline3' + _create_test_file(runtime, f'{sandbox_path}/grep_test.txt', content) + + action = GrepAction(pattern='TARGET_PATTERN', path=sandbox_path) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + assert 'TARGET_PATTERN' in obs.content + # Should show line number (line 2) + assert '2' in obs.content or 'grep_test.txt' in obs.content + finally: + _close_test_runtime(runtime) + + +# ============================================================================== +# ListDirAction Tests +# ============================================================================== + + +class TestListDir: + """Tests for ListDirAction runtime handler.""" + + def test_list_dir_basic(self, temp_dir, runtime_cls, run_as_openhands): + """Test basic directory listing.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_directory_structure(runtime, sandbox_path) + + action = ListDirAction(path=sandbox_path) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should show directory structure + assert 'src' in obs.content or 'tests' in obs.content or 'docs' in obs.content + finally: + _close_test_runtime(runtime) + + def test_list_dir_default_ignores(self, temp_dir, runtime_cls, run_as_openhands): + """Test default ignore patterns are applied.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + + # Create directories that should be ignored + ignored_dirs = [ + f'{sandbox_path}/node_modules/package/index.js', + f'{sandbox_path}/__pycache__/module.pyc', + f'{sandbox_path}/.git/config', + f'{sandbox_path}/src/main.py', # This should show + ] + for path in ignored_dirs: + _create_test_file(runtime, path, 'content') + + action = ListDirAction(path=sandbox_path) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Main.py should be visible + # node_modules, __pycache__, .git should be filtered + # (behavior may vary based on tool availability) + finally: + _close_test_runtime(runtime) + + def test_list_dir_with_custom_ignore(self, temp_dir, runtime_cls, run_as_openhands): + """Test custom ignore patterns.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_directory_structure(runtime, sandbox_path) + + # Ignore tests directory + action = ListDirAction(path=sandbox_path, ignore=['tests']) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should show src but potentially not tests + assert 'src' in obs.content or 'main.py' in obs.content + finally: + _close_test_runtime(runtime) + + def test_list_dir_empty_directory(self, temp_dir, runtime_cls, run_as_openhands): + """Test listing empty directory.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + + # Create empty directory + empty_dir = f'{sandbox_path}/empty_dir' + create_action = CmdRunAction(command=f'mkdir -p {empty_dir}') + create_action.set_hard_timeout(30) + runtime.run_action(create_action) + + action = ListDirAction(path=empty_dir) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should handle empty directory gracefully + finally: + _close_test_runtime(runtime) + + def test_list_dir_nested_structure(self, temp_dir, runtime_cls, run_as_openhands): + """Test listing shows nested directory structure.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_directory_structure(runtime, sandbox_path) + + action = ListDirAction(path=sandbox_path) + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + assert isinstance(obs, CmdOutputObservation) + # Should show directory hierarchy + # The exact format depends on the tool used (tree vs find) + assert len(obs.content) > 0 + finally: + _close_test_runtime(runtime) + + def test_list_dir_nonexistent(self, temp_dir, runtime_cls, run_as_openhands): + """Test listing non-existent directory.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + + action = ListDirAction(path=f'{sandbox_path}/nonexistent_dir_12345') + action.set_hard_timeout(30) + obs = _run_action(runtime, action) + + # Should handle gracefully (either error or empty output) + assert isinstance(obs, (CmdOutputObservation, ErrorObservation)) + finally: + _close_test_runtime(runtime) + + +# ============================================================================== +# Combined Workflow Tests +# ============================================================================== + + +class TestCombinedWorkflows: + """Tests for combined tool workflows.""" + + def test_write_then_read(self, temp_dir, runtime_cls, run_as_openhands): + """Test write followed by read returns correct content.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + test_content = 'def hello():\n print("Hello!")\n\nhello()' + + # Write file + write_action = OpenCodeWriteAction( + path=f'{sandbox_path}/workflow_test.py', + content=test_content + ) + write_action.set_hard_timeout(30) + write_obs = _run_action(runtime, write_action) + assert isinstance(write_obs, FileWriteObservation) + + # Read file back + read_action = OpenCodeReadAction(path=f'{sandbox_path}/workflow_test.py') + read_action.set_hard_timeout(30) + read_obs = _run_action(runtime, read_action) + + assert isinstance(read_obs, CmdOutputObservation) + assert 'def hello()' in read_obs.content + assert 'print' in read_obs.content + finally: + _close_test_runtime(runtime) + + def test_glob_then_read_multiple(self, temp_dir, runtime_cls, run_as_openhands): + """Test glob to find files, then read them.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_directory_structure(runtime, sandbox_path) + + # Find Python files + glob_action = GlobAction(pattern='*.py', path=f'{sandbox_path}/src') + glob_action.set_hard_timeout(30) + glob_obs = _run_action(runtime, glob_action) + + assert isinstance(glob_obs, CmdOutputObservation) + assert '.py' in glob_obs.content + finally: + _close_test_runtime(runtime) + + def test_grep_to_find_then_read(self, temp_dir, runtime_cls, run_as_openhands): + """Test grep to find pattern, then read matching file.""" + runtime, config = _load_runtime(temp_dir, runtime_cls, run_as_openhands) + try: + sandbox_path = config.workspace_mount_path_in_sandbox + _create_test_directory_structure(runtime, sandbox_path) + + # Find files with 'class' keyword + grep_action = GrepAction(pattern='class', path=sandbox_path) + grep_action.set_hard_timeout(30) + grep_obs = _run_action(runtime, grep_action) + + assert isinstance(grep_obs, CmdOutputObservation) + # Should find Core class + if 'class' in grep_obs.content: + assert 'Core' in grep_obs.content + finally: + _close_test_runtime(runtime) + diff --git a/tests/unit/agenthub/test_opencode_action_serialization.py b/tests/unit/agenthub/test_opencode_action_serialization.py new file mode 100644 index 000000000000..fb992abcc1b0 --- /dev/null +++ b/tests/unit/agenthub/test_opencode_action_serialization.py @@ -0,0 +1,430 @@ +"""Unit tests for OpenCode action serialization and deserialization. + +Tests that OpenCode actions can be properly serialized to dict and +deserialized back to action objects for network transport. +""" + +import pytest + +from openhands.core.schema import ActionType +from openhands.events.action import ( + GlobAction, + GrepAction, + ListDirAction, + OpenCodeReadAction, + OpenCodeWriteAction, +) +from openhands.events.serialization import event_from_dict, event_to_dict + + +# ============================================================================== +# OpenCodeReadAction Serialization Tests +# ============================================================================== + + +class TestOpenCodeReadActionSerialization: + """Tests for OpenCodeReadAction serialization.""" + + def test_serialize_basic(self): + """Test basic serialization.""" + action = OpenCodeReadAction(path='/test/file.py') + serialized = event_to_dict(action) + + assert serialized['action'] == ActionType.OPENCODE_READ + assert serialized['args']['path'] == '/test/file.py' + assert serialized['args']['offset'] == 0 + assert serialized['args']['limit'] == 2000 + + def test_serialize_with_params(self): + """Test serialization with all parameters.""" + action = OpenCodeReadAction( + path='/test/file.py', + offset=100, + limit=500, + thought='Reading file to understand implementation' + ) + serialized = event_to_dict(action) + + assert serialized['args']['path'] == '/test/file.py' + assert serialized['args']['offset'] == 100 + assert serialized['args']['limit'] == 500 + assert serialized['args']['thought'] == 'Reading file to understand implementation' + + def test_deserialize_basic(self): + """Test basic deserialization.""" + data = { + 'id': 1, + 'action': ActionType.OPENCODE_READ, + 'args': { + 'path': '/test/file.py', + 'offset': 0, + 'limit': 2000, + 'thought': '', + } + } + action = event_from_dict(data) + + assert isinstance(action, OpenCodeReadAction) + assert action.path == '/test/file.py' + assert action.offset == 0 + assert action.limit == 2000 + + def test_roundtrip(self): + """Test serialization roundtrip.""" + original = OpenCodeReadAction( + path='/path/to/file.txt', + offset=50, + limit=100, + thought='Test thought' + ) + + serialized = event_to_dict(original) + deserialized = event_from_dict(serialized) + + assert isinstance(deserialized, OpenCodeReadAction) + assert deserialized.path == original.path + assert deserialized.offset == original.offset + assert deserialized.limit == original.limit + assert deserialized.thought == original.thought + + +# ============================================================================== +# OpenCodeWriteAction Serialization Tests +# ============================================================================== + + +class TestOpenCodeWriteActionSerialization: + """Tests for OpenCodeWriteAction serialization.""" + + def test_serialize_basic(self): + """Test basic serialization.""" + action = OpenCodeWriteAction( + path='/test/file.py', + content='print("hello")' + ) + serialized = event_to_dict(action) + + assert serialized['action'] == ActionType.OPENCODE_WRITE + assert serialized['args']['path'] == '/test/file.py' + assert serialized['args']['content'] == 'print("hello")' + + def test_serialize_multiline_content(self): + """Test serialization with multiline content.""" + content = 'def foo():\n return 42\n\nfoo()' + action = OpenCodeWriteAction( + path='/test/file.py', + content=content + ) + serialized = event_to_dict(action) + + assert serialized['args']['content'] == content + + def test_serialize_special_characters(self): + """Test serialization with special characters.""" + content = 'print("Special: \\n\\t\\"quotes\\"")' + action = OpenCodeWriteAction( + path='/test/file.py', + content=content + ) + serialized = event_to_dict(action) + deserialized = event_from_dict(serialized) + + assert deserialized.content == content + + def test_deserialize_basic(self): + """Test basic deserialization.""" + data = { + 'id': 1, + 'action': ActionType.OPENCODE_WRITE, + 'args': { + 'path': '/test/file.py', + 'content': 'test content', + 'thought': '', + } + } + action = event_from_dict(data) + + assert isinstance(action, OpenCodeWriteAction) + assert action.path == '/test/file.py' + assert action.content == 'test content' + + def test_roundtrip(self): + """Test serialization roundtrip.""" + original = OpenCodeWriteAction( + path='/path/to/file.py', + content='def test():\n pass', + thought='Creating test file' + ) + + serialized = event_to_dict(original) + deserialized = event_from_dict(serialized) + + assert isinstance(deserialized, OpenCodeWriteAction) + assert deserialized.path == original.path + assert deserialized.content == original.content + assert deserialized.thought == original.thought + + +# ============================================================================== +# GlobAction Serialization Tests +# ============================================================================== + + +class TestGlobActionSerialization: + """Tests for GlobAction serialization.""" + + def test_serialize_basic(self): + """Test basic serialization.""" + action = GlobAction(pattern='*.py') + serialized = event_to_dict(action) + + assert serialized['action'] == ActionType.GLOB + assert serialized['args']['pattern'] == '*.py' + assert serialized['args']['path'] == '.' + + def test_serialize_with_path(self): + """Test serialization with path.""" + action = GlobAction(pattern='**/*.ts', path='/project/src') + serialized = event_to_dict(action) + + assert serialized['args']['pattern'] == '**/*.ts' + assert serialized['args']['path'] == '/project/src' + + def test_deserialize_basic(self): + """Test basic deserialization.""" + data = { + 'id': 1, + 'action': ActionType.GLOB, + 'args': { + 'pattern': '*.js', + 'path': '/app', + 'thought': '', + } + } + action = event_from_dict(data) + + assert isinstance(action, GlobAction) + assert action.pattern == '*.js' + assert action.path == '/app' + + def test_roundtrip(self): + """Test serialization roundtrip.""" + original = GlobAction( + pattern='**/*.{js,ts}', + path='/workspace', + thought='Finding all JS/TS files' + ) + + serialized = event_to_dict(original) + deserialized = event_from_dict(serialized) + + assert isinstance(deserialized, GlobAction) + assert deserialized.pattern == original.pattern + assert deserialized.path == original.path + + +# ============================================================================== +# GrepAction Serialization Tests +# ============================================================================== + + +class TestGrepActionSerialization: + """Tests for GrepAction serialization.""" + + def test_serialize_basic(self): + """Test basic serialization.""" + action = GrepAction(pattern='TODO') + serialized = event_to_dict(action) + + assert serialized['action'] == ActionType.GREP + assert serialized['args']['pattern'] == 'TODO' + assert serialized['args']['path'] == '.' + assert serialized['args']['include'] == '' + + def test_serialize_with_all_params(self): + """Test serialization with all parameters.""" + action = GrepAction( + pattern='class.*Handler', + path='/src', + include='*.py' + ) + serialized = event_to_dict(action) + + assert serialized['args']['pattern'] == 'class.*Handler' + assert serialized['args']['path'] == '/src' + assert serialized['args']['include'] == '*.py' + + def test_deserialize_basic(self): + """Test basic deserialization.""" + data = { + 'id': 1, + 'action': ActionType.GREP, + 'args': { + 'pattern': 'import', + 'path': '/lib', + 'include': '*.ts', + 'thought': '', + } + } + action = event_from_dict(data) + + assert isinstance(action, GrepAction) + assert action.pattern == 'import' + assert action.path == '/lib' + assert action.include == '*.ts' + + def test_roundtrip(self): + """Test serialization roundtrip.""" + original = GrepAction( + pattern=r'def \w+\(', + path='/project', + include='*.py', + thought='Finding function definitions' + ) + + serialized = event_to_dict(original) + deserialized = event_from_dict(serialized) + + assert isinstance(deserialized, GrepAction) + assert deserialized.pattern == original.pattern + assert deserialized.path == original.path + assert deserialized.include == original.include + + +# ============================================================================== +# ListDirAction Serialization Tests +# ============================================================================== + + +class TestListDirActionSerialization: + """Tests for ListDirAction serialization.""" + + def test_serialize_basic(self): + """Test basic serialization.""" + action = ListDirAction() + serialized = event_to_dict(action) + + assert serialized['action'] == ActionType.LIST_DIR + assert serialized['args']['path'] == '.' + assert serialized['args']['ignore'] == [] + + def test_serialize_with_params(self): + """Test serialization with parameters.""" + action = ListDirAction( + path='/project', + ignore=['*.log', 'tmp', '.cache'] + ) + serialized = event_to_dict(action) + + assert serialized['args']['path'] == '/project' + assert serialized['args']['ignore'] == ['*.log', 'tmp', '.cache'] + + def test_deserialize_basic(self): + """Test basic deserialization.""" + data = { + 'id': 1, + 'action': ActionType.LIST_DIR, + 'args': { + 'path': '/workspace', + 'ignore': ['node_modules'], + 'thought': '', + } + } + action = event_from_dict(data) + + assert isinstance(action, ListDirAction) + assert action.path == '/workspace' + assert action.ignore == ['node_modules'] + + def test_roundtrip(self): + """Test serialization roundtrip.""" + original = ListDirAction( + path='/app', + ignore=['dist', 'build'], + thought='Listing project files' + ) + + serialized = event_to_dict(original) + deserialized = event_from_dict(serialized) + + assert isinstance(deserialized, ListDirAction) + assert deserialized.path == original.path + assert deserialized.ignore == original.ignore + + def test_all_ignores_not_serialized(self): + """Test that all_ignores property is not serialized (it's computed).""" + action = ListDirAction(ignore=['custom']) + serialized = event_to_dict(action) + + # all_ignores should not be in serialized args + assert 'all_ignores' not in serialized['args'] + # But ignore should be + assert serialized['args']['ignore'] == ['custom'] + + +# ============================================================================== +# Edge Cases +# ============================================================================== + + +class TestSerializationEdgeCases: + """Tests for serialization edge cases.""" + + def test_unicode_in_path(self): + """Test Unicode characters in paths.""" + action = OpenCodeReadAction(path='/путь/文件/αρχείο.py') + serialized = event_to_dict(action) + deserialized = event_from_dict(serialized) + + assert deserialized.path == '/путь/文件/αρχείο.py' + + def test_unicode_in_content(self): + """Test Unicode characters in content.""" + content = '# 日本語コメント\nprint("你好世界")' + action = OpenCodeWriteAction(path='/test.py', content=content) + serialized = event_to_dict(action) + deserialized = event_from_dict(serialized) + + assert deserialized.content == content + + def test_empty_string_handling(self): + """Test empty strings are preserved.""" + action = OpenCodeWriteAction(path='/empty.txt', content='') + serialized = event_to_dict(action) + deserialized = event_from_dict(serialized) + + assert deserialized.content == '' + + def test_newlines_preserved(self): + """Test newlines in content are preserved.""" + content = 'line1\nline2\r\nline3\n' + action = OpenCodeWriteAction(path='/test.txt', content=content) + serialized = event_to_dict(action) + deserialized = event_from_dict(serialized) + + assert deserialized.content == content + + def test_regex_pattern_preserved(self): + """Test regex patterns are preserved correctly.""" + pattern = r'^import\s+(\w+)\s+from\s+["\'](.+)["\']' + action = GrepAction(pattern=pattern) + serialized = event_to_dict(action) + deserialized = event_from_dict(serialized) + + assert deserialized.pattern == pattern + + def test_special_glob_patterns(self): + """Test special glob patterns are preserved.""" + patterns = [ + '*.{js,ts,jsx,tsx}', + '[!_]*.py', + '**/*[0-9]*.log', + '**/test_*.py', + ] + for pattern in patterns: + action = GlobAction(pattern=pattern) + serialized = event_to_dict(action) + deserialized = event_from_dict(serialized) + + assert deserialized.pattern == pattern, f"Failed for pattern: {pattern}" + diff --git a/tests/unit/agenthub/test_opencode_handlers.py b/tests/unit/agenthub/test_opencode_handlers.py new file mode 100644 index 000000000000..42139ae70f80 --- /dev/null +++ b/tests/unit/agenthub/test_opencode_handlers.py @@ -0,0 +1,615 @@ +"""Unit tests for OpenCode runtime handler logic. + +These tests verify the handler logic used in action_execution_server.py +by testing the individual components in isolation. +""" + +import os +import tempfile + +import pytest + +from openhands.events.action import ( + GlobAction, + GrepAction, + ListDirAction, + OpenCodeReadAction, + OpenCodeWriteAction, +) + + +# ============================================================================== +# OpenCodeRead Handler Tests +# ============================================================================== + + +class TestOpenCodeReadHandler: + """Tests for opencode_read handler logic.""" + + def test_read_formats_line_numbers_correctly(self): + """Test that line numbers are formatted as 5-digit zero-padded with | separator.""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: + f.write('line 1\nline 2\nline 3') + temp_path = f.name + + try: + # Simulate reading - test the formatting logic + with open(temp_path, 'r') as f: + lines = f.read().split('\n') + + # Format as OpenCode does + formatted = [f"{str(i + 1).zfill(5)}| {line}" for i, line in enumerate(lines)] + + assert formatted[0] == '00001| line 1' + assert formatted[1] == '00002| line 2' + assert formatted[2] == '00003| line 3' + finally: + os.unlink(temp_path) + + def test_read_respects_offset(self): + """Test that offset parameter works correctly.""" + lines = [f'line {i}' for i in range(1, 101)] + offset = 49 # Start from line 50 (0-indexed) + limit = 10 + + result = lines[offset:offset + limit] + + assert result[0] == 'line 50' + assert len(result) == 10 + + def test_read_respects_limit(self): + """Test that limit parameter works correctly.""" + lines = [f'line {i}' for i in range(1, 101)] + offset = 0 + limit = 5 + + result = lines[offset:offset + limit] + + assert len(result) == 5 + assert result[-1] == 'line 5' + + def test_read_truncates_long_lines(self): + """Test that lines longer than MAX_LINE_LENGTH are truncated.""" + MAX_LINE_LENGTH = 2000 + long_line = 'x' * 3000 + + if len(long_line) > MAX_LINE_LENGTH: + truncated = long_line[:MAX_LINE_LENGTH] + '...' + else: + truncated = long_line + + assert len(truncated) == MAX_LINE_LENGTH + 3 # +3 for '...' + assert truncated.endswith('...') + + def test_binary_extension_detection(self): + """Test binary file detection by extension.""" + BINARY_EXTENSIONS = { + '.zip', '.tar', '.gz', '.exe', '.dll', '.so', '.class', '.jar', + '.war', '.7z', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', + '.bin', '.dat', '.obj', '.o', '.a', '.lib', '.wasm', '.pyc', '.pyo' + } + + # Test that binary extensions are detected + assert os.path.splitext('/path/file.zip')[1].lower() in BINARY_EXTENSIONS + assert os.path.splitext('/path/file.pyc')[1].lower() in BINARY_EXTENSIONS + + # Test that text extensions are not detected + assert os.path.splitext('/path/file.py')[1].lower() not in BINARY_EXTENSIONS + assert os.path.splitext('/path/file.txt')[1].lower() not in BINARY_EXTENSIONS + + def test_binary_content_detection(self): + """Test binary file detection by content analysis.""" + # Text content + text_content = b'def hello():\n print("hi")\n' + has_null = b'\x00' in text_content + assert not has_null + + # Binary content + binary_content = b'\x00\x01\x02\x03\x04\x05' + has_null = b'\x00' in binary_content + assert has_null + + def test_byte_limit_calculation(self): + """Test that byte limit calculation works correctly.""" + MAX_BYTES = 50 * 1024 # 50KB + lines = ['x' * 100 for _ in range(1000)] # 1000 lines of 100 chars each + + total_bytes = 0 + read_lines = [] + for line in lines: + line_bytes = len(line.encode('utf-8')) + 1 # +1 for newline + if total_bytes + line_bytes > MAX_BYTES: + break + read_lines.append(line) + total_bytes += line_bytes + + assert total_bytes <= MAX_BYTES + # With 100 chars per line + 1 newline = 101 bytes + # 50KB / 101 bytes ≈ 507 lines max + assert len(read_lines) < 520 + + +# ============================================================================== +# OpenCodeWrite Handler Tests +# ============================================================================== + + +class TestOpenCodeWriteHandler: + """Tests for opencode_write handler logic.""" + + def test_write_creates_file(self): + """Test that write creates file with correct content.""" + with tempfile.TemporaryDirectory() as temp_dir: + filepath = os.path.join(temp_dir, 'test.txt') + content = 'test content' + + with open(filepath, 'w') as f: + f.write(content) + + with open(filepath, 'r') as f: + assert f.read() == content + + def test_write_creates_parent_directories(self): + """Test that write creates parent directories if needed.""" + with tempfile.TemporaryDirectory() as temp_dir: + nested_path = os.path.join(temp_dir, 'a', 'b', 'c', 'file.txt') + + os.makedirs(os.path.dirname(nested_path), exist_ok=True) + with open(nested_path, 'w') as f: + f.write('content') + + assert os.path.exists(nested_path) + + def test_write_overwrites_existing(self): + """Test that write overwrites existing file content.""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: + f.write('original') + temp_path = f.name + + try: + with open(temp_path, 'w') as f: + f.write('new content') + + with open(temp_path, 'r') as f: + assert f.read() == 'new content' + finally: + os.unlink(temp_path) + + def test_linter_detection_by_extension(self): + """Test that correct linter is selected based on file extension.""" + extension_to_linter = { + '.py': ['flake8', 'pylint', 'py_compile'], + '.js': ['eslint'], + '.jsx': ['eslint'], + '.ts': ['eslint'], + '.tsx': ['eslint'], + '.go': ['go vet'], + '.rs': ['cargo check'], + } + + for ext, linters in extension_to_linter.items(): + assert ext in extension_to_linter + assert len(linters) > 0 + + def test_write_preserves_unicode(self): + """Test that Unicode content is preserved.""" + with tempfile.TemporaryDirectory() as temp_dir: + filepath = os.path.join(temp_dir, 'unicode.txt') + content = '你好世界\nこんにちは\n🎉' + + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + + with open(filepath, 'r', encoding='utf-8') as f: + assert f.read() == content + + +# ============================================================================== +# Glob Handler Tests +# ============================================================================== + + +class TestGlobHandler: + """Tests for glob handler logic.""" + + def test_glob_pattern_matching(self): + """Test glob pattern matching logic.""" + import fnmatch + + files = [ + 'main.py', + 'utils.py', + 'test.js', + 'config.json', + 'src/lib.py', + ] + + # Test *.py pattern + py_files = [f for f in files if fnmatch.fnmatch(f, '*.py')] + assert 'main.py' in py_files + assert 'utils.py' in py_files + assert 'test.js' not in py_files + + # Note: fnmatch doesn't handle ** recursion, that's handled by find/rg + + def test_glob_result_limit(self): + """Test that glob results are limited to 100.""" + limit = 100 + files = [f'file{i}.txt' for i in range(200)] + + limited = files[:limit] + assert len(limited) == 100 + + def test_glob_various_patterns(self): + """Test various glob patterns work correctly.""" + import fnmatch + + test_cases = [ + ('*.py', 'test.py', True), + ('*.py', 'test.js', False), + ('*.{py,js}', 'test.py', False), # fnmatch doesn't support {,} + ('test_*.py', 'test_main.py', True), + ('test_*.py', 'main_test.py', False), + ('[!_]*.py', 'main.py', True), + ('[!_]*.py', '_private.py', False), + ] + + for pattern, filename, expected in test_cases: + if '{' not in pattern: # fnmatch doesn't support brace expansion + result = fnmatch.fnmatch(filename, pattern) + assert result == expected, f"Pattern {pattern} vs {filename}" + + +# ============================================================================== +# Grep Handler Tests +# ============================================================================== + + +class TestGrepHandler: + """Tests for grep handler logic.""" + + def test_grep_pattern_matching(self): + """Test grep pattern matching.""" + import re + + content = ''' +def foo(): + pass + +def bar(): + return 42 + +class MyClass: + pass +''' + + # Find function definitions + pattern = r'def \w+\(' + matches = re.findall(pattern, content) + assert len(matches) == 2 + assert 'def foo(' in matches + assert 'def bar(' in matches + + def test_grep_result_limit(self): + """Test that grep results are limited to 100.""" + limit = 100 + lines = [f'match_{i}' for i in range(200)] + + limited = lines[:limit] + assert len(limited) == 100 + + def test_grep_include_filter_logic(self): + """Test include filter pattern matching.""" + import fnmatch + + files = [ + 'main.py', + 'utils.py', + 'test.js', + 'config.json', + ] + + include_pattern = '*.py' + filtered = [f for f in files if fnmatch.fnmatch(f, include_pattern)] + + assert 'main.py' in filtered + assert 'test.js' not in filtered + + def test_grep_regex_patterns(self): + """Test various regex patterns.""" + import re + + content = 'import os\nfrom pathlib import Path\nimport sys' + + patterns_expected = [ + (r'^import', 2), # Lines starting with 'import' + (r'import \w+', 3), # 'import' followed by word + (r'from \w+ import', 1), # 'from X import' pattern + ] + + for pattern, expected_count in patterns_expected: + matches = re.findall(pattern, content, re.MULTILINE) + assert len(matches) == expected_count, f"Pattern {pattern}" + + +# ============================================================================== +# ListDir Handler Tests +# ============================================================================== + + +class TestListDirHandler: + """Tests for list_dir handler logic.""" + + def test_default_ignore_patterns(self): + """Test that default ignore patterns are correct.""" + default_ignores = [ + 'node_modules', + '__pycache__', + '.git', + 'dist', + 'build', + 'target', + 'vendor', + '.venv', + 'venv', + '.cache', + ] + + action = ListDirAction() + assert action.DEFAULT_IGNORES == default_ignores + + def test_combined_ignore_patterns(self): + """Test that custom ignores are combined with defaults.""" + custom = ['*.log', 'tmp'] + action = ListDirAction(ignore=custom) + + all_ignores = action.all_ignores + + # Should contain defaults + assert 'node_modules' in all_ignores + assert '__pycache__' in all_ignores + + # Should contain custom + assert '*.log' in all_ignores + assert 'tmp' in all_ignores + + def test_tree_structure_building(self): + """Test tree structure building logic.""" + files = [ + 'src/main.py', + 'src/utils.py', + 'src/lib/core.py', + 'tests/test_main.py', + ] + + dirs = set() + files_by_dir = {} + + for f in files: + d = os.path.dirname(f) or '.' + parts = d.split(os.sep) if d != '.' else [] + + # Add all parent directories + for i in range(len(parts) + 1): + dir_p = os.sep.join(parts[:i]) if i > 0 else '.' + dirs.add(dir_p) + + if d not in files_by_dir: + files_by_dir[d] = [] + files_by_dir[d].append(os.path.basename(f)) + + assert '.' in dirs + assert 'src' in dirs + assert 'src/lib' in dirs + assert 'tests' in dirs + + assert 'main.py' in files_by_dir['src'] + assert 'core.py' in files_by_dir['src/lib'] + + def test_ignore_pattern_filtering(self): + """Test that ignore patterns filter correctly.""" + files = [ + 'src/main.py', + 'node_modules/package/index.js', + '__pycache__/module.pyc', + '.git/config', + 'dist/bundle.js', + ] + + ignores = ['node_modules', '__pycache__', '.git', 'dist'] + + filtered = [ + f for f in files + if not any(ignore in f for ignore in ignores) + ] + + assert 'src/main.py' in filtered + assert len(filtered) == 1 + + +# ============================================================================== +# Action Properties Tests +# ============================================================================== + + +class TestActionProperties: + """Tests for action class properties.""" + + def test_opencode_read_message(self): + """Test OpenCodeReadAction message property.""" + action = OpenCodeReadAction(path='/test.py') + assert action.message == 'Reading file: /test.py' + + action_with_offset = OpenCodeReadAction(path='/test.py', offset=50) + assert 'from line 51' in action_with_offset.message + + def test_opencode_write_message(self): + """Test OpenCodeWriteAction message property.""" + action = OpenCodeWriteAction(path='/test.py', content='') + assert action.message == 'Writing file: /test.py' + + def test_glob_message(self): + """Test GlobAction message property.""" + action = GlobAction(pattern='**/*.py') + assert '**/*.py' in action.message + + def test_grep_message(self): + """Test GrepAction message property.""" + action = GrepAction(pattern='TODO') + assert 'TODO' in action.message + + def test_list_dir_message(self): + """Test ListDirAction message property.""" + action = ListDirAction(path='/project') + assert '/project' in action.message + + action_default = ListDirAction() + # Message contains the path, which is '.' + assert '.' in action_default.message or 'directory' in action_default.message + + def test_actions_are_runnable(self): + """Test that all OpenCode actions are runnable.""" + actions = [ + OpenCodeReadAction(path='/test.py'), + OpenCodeWriteAction(path='/test.py', content=''), + GlobAction(pattern='*.py'), + GrepAction(pattern='TODO'), + ListDirAction(), + ] + + for action in actions: + assert action.runnable is True + + +# ============================================================================== +# File Suggestions Tests +# ============================================================================== + + +class TestFileSuggestions: + """Tests for file not found suggestions logic.""" + + def test_find_similar_files(self): + """Test finding similar files for suggestions.""" + directory_contents = [ + 'test_file.py', + 'test_file.txt', + 'test_main.py', + 'other_file.py', + 'config.json', + ] + + basename = 'test_file' + + suggestions = [ + entry for entry in directory_contents + if basename.lower() in entry.lower() or entry.lower() in basename.lower() + ][:3] + + assert 'test_file.py' in suggestions + assert 'test_file.txt' in suggestions + assert len(suggestions) <= 3 + + def test_case_insensitive_matching(self): + """Test that file suggestions are case-insensitive.""" + directory_contents = [ + 'TestFile.py', + 'TESTFILE.txt', + 'testfile.js', + ] + + basename = 'testfile' + + suggestions = [ + entry for entry in directory_contents + if basename.lower() in entry.lower() + ] + + assert len(suggestions) == 3 + + +# ============================================================================== +# Subprocess Command Building Tests +# ============================================================================== + + +class TestSubprocessCommandBuilding: + """Tests for building subprocess commands.""" + + def test_ripgrep_glob_command(self): + """Test ripgrep command for glob.""" + pattern = '*.py' + search_path = '/project' + + cmd = ['rg', '--files', '-g', pattern, '--sortr', 'modified', search_path] + + assert cmd[0] == 'rg' + assert '-g' in cmd + assert pattern in cmd + assert '--sortr' in cmd + + def test_find_glob_fallback_command(self): + """Test find command for glob fallback.""" + pattern = '*.py' + search_path = '/project' + + # Note: This is a simplified version + cmd = f'find {search_path} -type f -name "{pattern}"' + + assert 'find' in cmd + assert search_path in cmd + assert pattern in cmd + + def test_ripgrep_grep_command(self): + """Test ripgrep command for grep.""" + pattern = 'TODO' + search_path = '/project' + + cmd = ['rg', '-n', pattern, search_path] + + assert cmd[0] == 'rg' + assert '-n' in cmd # Line numbers + assert pattern in cmd + + def test_grep_fallback_command(self): + """Test grep command for fallback.""" + pattern = 'TODO' + search_path = '/project' + + cmd = f'grep -rn "{pattern}" {search_path}' + + assert 'grep' in cmd + assert '-rn' in cmd # Recursive with line numbers + assert pattern in cmd + + def test_tree_command(self): + """Test tree command for list_dir.""" + path = '/project' + ignores = ['node_modules', '__pycache__'] + + cmd = ['tree', '-L', '3', '--noreport'] + for ignore in ignores: + cmd.extend(['-I', ignore]) + cmd.append(path) + + assert 'tree' in cmd + assert '-L' in cmd + assert '3' in cmd # Depth limit + + def test_grep_with_include_command(self): + """Test grep command with include filter.""" + pattern = 'import' + search_path = '/project' + include = '*.py' + + # Ripgrep version + rg_cmd = ['rg', '-n', '-g', include, pattern, search_path] + assert '-g' in rg_cmd + assert include in rg_cmd + + # Find+grep fallback + find_grep = f'find {search_path} -type f -name "{include}" -exec grep -Hn "{pattern}" {{}} \\;' + assert 'find' in find_grep + assert '-name' in find_grep + assert include in find_grep + diff --git a/tests/unit/agenthub/test_opencode_runtime_handlers.py b/tests/unit/agenthub/test_opencode_runtime_handlers.py new file mode 100644 index 000000000000..51aa3f011a2f --- /dev/null +++ b/tests/unit/agenthub/test_opencode_runtime_handlers.py @@ -0,0 +1,892 @@ +"""Standalone unit tests for actual OpenCode runtime handler implementations. + +These tests directly test the handler methods in action_execution_server.py +using real file operations on temporary directories, without requiring Docker. +""" + +import asyncio +import os +import subprocess +import tempfile +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from openhands.events.action import ( + GlobAction, + GrepAction, + ListDirAction, + OpenCodeReadAction, + OpenCodeWriteAction, +) +from openhands.events.observation import ( + CmdOutputObservation, + ErrorObservation, + FileWriteObservation, +) + + +# ============================================================================== +# Test Fixtures +# ============================================================================== + + +@pytest.fixture +def temp_workspace(): + """Create a temporary workspace directory for testing.""" + with tempfile.TemporaryDirectory() as tmpdir: + yield tmpdir + + +@pytest.fixture +def mock_executor(temp_workspace): + """Create a minimal mock ActionExecutor with real file system access.""" + # Import here to avoid circular imports + from openhands.runtime.action_execution_server import ActionExecutor + + # Create a mock that has the necessary attributes + executor = MagicMock(spec=ActionExecutor) + executor._initial_cwd = temp_workspace + executor.bash_session = MagicMock() + executor.bash_session.cwd = temp_workspace + executor.lock = asyncio.Lock() + + # Use the real _resolve_path logic + def resolve_path(path, working_dir): + if os.path.isabs(path): + return path + return os.path.join(working_dir, path) + + executor._resolve_path = resolve_path + + return executor + + +def create_test_file(directory: str, filename: str, content: str) -> str: + """Helper to create a test file.""" + filepath = os.path.join(directory, filename) + os.makedirs(os.path.dirname(filepath), exist_ok=True) + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + return filepath + + +def create_test_structure(base_dir: str) -> dict: + """Create a test directory structure and return file paths.""" + files = { + 'main.py': 'def main():\n print("Hello")\n\nif __name__ == "__main__":\n main()', + 'utils.py': '# Utility functions\n\ndef helper():\n return 42\n\ndef another():\n pass', + 'src/core.py': 'class Core:\n def __init__(self):\n self.value = 0\n\n def run(self):\n pass', + 'src/lib/helpers.py': 'import os\n\ndef get_path():\n return os.getcwd()', + 'tests/test_main.py': 'import pytest\n\ndef test_main():\n assert True\n\ndef test_another():\n pass', + 'config.json': '{"name": "test", "version": "1.0.0"}', + 'README.md': '# Test Project\n\nThis is a test project.', + '.gitignore': 'node_modules/\n__pycache__/\n*.pyc', + } + + paths = {} + for rel_path, content in files.items(): + full_path = os.path.join(base_dir, rel_path) + os.makedirs(os.path.dirname(full_path), exist_ok=True) + with open(full_path, 'w', encoding='utf-8') as f: + f.write(content) + paths[rel_path] = full_path + + return paths + + +# ============================================================================== +# OpenCodeRead Handler Tests - Direct Implementation +# ============================================================================== + + +class TestOpenCodeReadHandlerImpl: + """Tests for the actual opencode_read handler implementation logic.""" + + def test_read_file_line_number_format(self, temp_workspace): + """Test that files are read with correct 5-digit line number format.""" + content = 'line 1\nline 2\nline 3\nline 4\nline 5' + filepath = create_test_file(temp_workspace, 'test.txt', content) + + # Simulate the handler logic + with open(filepath, 'r', encoding='utf-8', errors='replace') as f: + lines = f.read().split('\n') + + offset = 0 + limit = 2000 + raw = lines[offset:offset + limit] + + # Format with 5-digit line numbers (OpenCode style) + formatted = [f"{str(i + offset + 1).zfill(5)}| {line}" for i, line in enumerate(raw)] + + assert formatted[0] == '00001| line 1' + assert formatted[1] == '00002| line 2' + assert formatted[4] == '00005| line 5' + + def test_read_file_with_offset(self, temp_workspace): + """Test reading file starting from offset.""" + content = '\n'.join([f'line {i}' for i in range(1, 101)]) + filepath = create_test_file(temp_workspace, 'offset_test.txt', content) + + with open(filepath, 'r', encoding='utf-8') as f: + lines = f.read().split('\n') + + offset = 49 # Start from line 50 + limit = 10 + raw = lines[offset:offset + limit] + formatted = [f"{str(i + offset + 1).zfill(5)}| {line}" for i, line in enumerate(raw)] + + assert formatted[0] == '00050| line 50' + assert len(formatted) == 10 + + def test_read_file_with_limit(self, temp_workspace): + """Test reading file with line limit.""" + content = '\n'.join([f'line {i}' for i in range(1, 101)]) + filepath = create_test_file(temp_workspace, 'limit_test.txt', content) + + with open(filepath, 'r', encoding='utf-8') as f: + lines = f.read().split('\n') + + offset = 0 + limit = 5 + raw = lines[offset:offset + limit] + + assert len(raw) == 5 + assert raw[0] == 'line 1' + assert raw[4] == 'line 5' + + def test_read_file_long_line_truncation(self, temp_workspace): + """Test that long lines are truncated with ...""" + MAX_LINE_LENGTH = 2000 + long_line = 'x' * 3000 + filepath = create_test_file(temp_workspace, 'long_line.txt', long_line) + + with open(filepath, 'r', encoding='utf-8') as f: + lines = f.read().split('\n') + + # Apply truncation logic + processed = [] + for line in lines: + if len(line) > MAX_LINE_LENGTH: + line = line[:MAX_LINE_LENGTH] + '...' + processed.append(line) + + assert processed[0].endswith('...') + assert len(processed[0]) == MAX_LINE_LENGTH + 3 + + def test_read_file_byte_limit(self, temp_workspace): + """Test that output is truncated at byte limit.""" + MAX_BYTES = 50 * 1024 # 50KB + # Create file larger than 50KB + lines = ['x' * 100 for _ in range(1000)] # ~100KB + content = '\n'.join(lines) + filepath = create_test_file(temp_workspace, 'large_file.txt', content) + + with open(filepath, 'r', encoding='utf-8') as f: + file_lines = f.read().split('\n') + + # Apply byte limit logic + total_bytes = 0 + result = [] + truncated = False + + for line in file_lines: + line_bytes = len(line.encode('utf-8')) + 1 + if total_bytes + line_bytes > MAX_BYTES: + truncated = True + break + result.append(line) + total_bytes += line_bytes + + assert truncated + assert total_bytes <= MAX_BYTES + assert len(result) < len(file_lines) + + def test_read_nonexistent_file_suggestions(self, temp_workspace): + """Test file not found with similar file suggestions.""" + # Create similar files + create_test_file(temp_workspace, 'test_file.py', 'content') + create_test_file(temp_workspace, 'test_file.txt', 'content') + create_test_file(temp_workspace, 'test_utils.py', 'content') + + # Simulate file not found logic + missing_file = 'test_file.js' + directory = temp_workspace + basename = os.path.splitext(missing_file)[0] # 'test_file' + + entries = os.listdir(directory) + suggestions = [ + os.path.join(directory, entry) + for entry in entries + if basename.lower() in entry.lower() or entry.lower() in basename.lower() + ][:3] + + assert len(suggestions) >= 2 + assert any('test_file.py' in s for s in suggestions) + assert any('test_file.txt' in s for s in suggestions) + + def test_read_binary_detection_by_extension(self, temp_workspace): + """Test binary file detection by extension.""" + BINARY_EXTENSIONS = { + '.zip', '.tar', '.gz', '.exe', '.dll', '.so', '.class', '.jar', + '.war', '.7z', '.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', + '.bin', '.dat', '.obj', '.o', '.a', '.lib', '.wasm', '.pyc', '.pyo' + } + + # Test detection + test_paths = [ + ('file.zip', True), + ('file.py', False), + ('file.pyc', True), + ('file.txt', False), + ('archive.tar.gz', True), # .gz extension is binary + ('data.bin', True), + ] + + for path, expected_binary in test_paths: + ext = os.path.splitext(path)[1].lower() + is_binary = ext in BINARY_EXTENSIONS + assert is_binary == expected_binary, f"Failed for {path}" + + def test_read_binary_detection_by_content(self, temp_workspace): + """Test binary file detection by content analysis.""" + # Create a binary-like file + binary_content = b'\x00\x01\x02\x03\x04\x05\x06\x07' + binary_path = os.path.join(temp_workspace, 'binary.dat') + with open(binary_path, 'wb') as f: + f.write(binary_content) + + # Create a text file + text_content = 'Hello, world!\nThis is text.' + text_path = os.path.join(temp_workspace, 'text.txt') + with open(text_path, 'w') as f: + f.write(text_content) + + # Binary detection logic + def is_binary_file(filepath): + try: + with open(filepath, 'rb') as f: + chunk = f.read(4096) + if b'\x00' in chunk: + return True + if chunk: + non_printable = sum(1 for b in chunk if b < 9 or (b > 13 and b < 32)) + if non_printable / len(chunk) > 0.3: + return True + return False + except Exception: + return False + + assert is_binary_file(binary_path) == True + assert is_binary_file(text_path) == False + + +# ============================================================================== +# OpenCodeWrite Handler Tests - Direct Implementation +# ============================================================================== + + +class TestOpenCodeWriteHandlerImpl: + """Tests for the actual opencode_write handler implementation logic.""" + + def test_write_creates_file(self, temp_workspace): + """Test that write creates a new file.""" + filepath = os.path.join(temp_workspace, 'new_file.py') + content = 'print("Hello, World!")' + + # Write logic + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + + assert os.path.exists(filepath) + with open(filepath, 'r') as f: + assert f.read() == content + + def test_write_creates_parent_directories(self, temp_workspace): + """Test that write creates parent directories if needed.""" + filepath = os.path.join(temp_workspace, 'a', 'b', 'c', 'deep_file.txt') + content = 'nested content' + + # Write logic with directory creation + directory = os.path.dirname(filepath) + os.makedirs(directory, exist_ok=True) + + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + + assert os.path.exists(filepath) + with open(filepath, 'r') as f: + assert f.read() == content + + def test_write_overwrites_existing(self, temp_workspace): + """Test that write overwrites existing file.""" + filepath = os.path.join(temp_workspace, 'existing.txt') + + # Create initial file + with open(filepath, 'w') as f: + f.write('original content') + + # Overwrite + new_content = 'new content' + with open(filepath, 'w', encoding='utf-8') as f: + f.write(new_content) + + with open(filepath, 'r') as f: + assert f.read() == new_content + + def test_write_empty_content(self, temp_workspace): + """Test writing empty content creates empty file.""" + filepath = os.path.join(temp_workspace, 'empty.txt') + + with open(filepath, 'w', encoding='utf-8') as f: + f.write('') + + assert os.path.exists(filepath) + assert os.path.getsize(filepath) == 0 + + def test_write_preserves_unicode(self, temp_workspace): + """Test that Unicode content is preserved.""" + filepath = os.path.join(temp_workspace, 'unicode.txt') + content = '你好世界\nこんにちは\n🎉 emoji test' + + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + + with open(filepath, 'r', encoding='utf-8') as f: + assert f.read() == content + + def test_write_multiline_content(self, temp_workspace): + """Test that multiline content preserves line breaks.""" + filepath = os.path.join(temp_workspace, 'multiline.py') + content = 'def hello():\n print("Hello")\n\ndef goodbye():\n print("Bye")\n' + + with open(filepath, 'w', encoding='utf-8') as f: + f.write(content) + + with open(filepath, 'r') as f: + lines = f.readlines() + + # Content has 5 lines (trailing newline doesn't create extra line in readlines) + assert len(lines) == 5 + assert 'def hello():' in lines[0] + + def test_linter_command_selection(self): + """Test that correct linter is selected based on extension.""" + def get_linter_commands(filepath): + ext = os.path.splitext(filepath)[1].lower() + + if ext == '.py': + return [ + ['flake8', '--max-line-length=120', filepath], + ['pylint', '--errors-only', filepath], + ['python3', '-m', 'py_compile', filepath], + ] + elif ext in ('.js', '.jsx', '.ts', '.tsx'): + return [['eslint', '--format=compact', filepath]] + elif ext == '.go': + return [['go', 'vet', filepath]] + elif ext == '.rs': + return [['cargo', 'check', '--message-format=short']] + return [] + + assert len(get_linter_commands('/test.py')) == 3 + assert get_linter_commands('/test.py')[0][0] == 'flake8' + + assert len(get_linter_commands('/test.js')) == 1 + assert get_linter_commands('/test.js')[0][0] == 'eslint' + + assert get_linter_commands('/test.tsx')[0][0] == 'eslint' + assert get_linter_commands('/test.go')[0][0] == 'go' + + +# ============================================================================== +# Glob Handler Tests - Direct Implementation +# ============================================================================== + + +class TestGlobHandlerImpl: + """Tests for the actual glob handler implementation logic.""" + + def test_glob_finds_python_files(self, temp_workspace): + """Test glob finds Python files.""" + create_test_structure(temp_workspace) + + # Use find command (fallback when rg not available) + result = subprocess.run( + ['find', temp_workspace, '-type', 'f', '-name', '*.py'], + capture_output=True, text=True + ) + + files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] + + assert len(files) >= 3 # main.py, utils.py, core.py, helpers.py, test_main.py + assert any('main.py' in f for f in files) + assert any('utils.py' in f for f in files) + + def test_glob_finds_json_files(self, temp_workspace): + """Test glob finds specific extension.""" + create_test_structure(temp_workspace) + + result = subprocess.run( + ['find', temp_workspace, '-type', 'f', '-name', '*.json'], + capture_output=True, text=True + ) + + files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] + + assert len(files) == 1 + assert 'config.json' in files[0] + + def test_glob_in_subdirectory(self, temp_workspace): + """Test glob searches in specific directory.""" + create_test_structure(temp_workspace) + + src_dir = os.path.join(temp_workspace, 'src') + result = subprocess.run( + ['find', src_dir, '-type', 'f', '-name', '*.py'], + capture_output=True, text=True + ) + + files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] + + # Should find core.py and helpers.py + assert len(files) >= 1 + assert any('core.py' in f for f in files) + + def test_glob_no_matches(self, temp_workspace): + """Test glob returns empty for no matches.""" + create_test_structure(temp_workspace) + + result = subprocess.run( + ['find', temp_workspace, '-type', 'f', '-name', '*.nonexistent'], + capture_output=True, text=True + ) + + files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] + assert len(files) == 0 + + def test_glob_result_sorting_by_mtime(self, temp_workspace): + """Test that results can be sorted by modification time.""" + import time + + # Create files with different mtimes + file1 = create_test_file(temp_workspace, 'old.py', 'old content') + time.sleep(0.1) + file2 = create_test_file(temp_workspace, 'new.py', 'new content') + + # Get files with mtime + result = subprocess.run( + ['find', temp_workspace, '-type', 'f', '-name', '*.py', '-printf', '%T@ %p\n'], + capture_output=True, text=True + ) + + lines = [l for l in result.stdout.strip().split('\n') if l.strip()] + # Sort by mtime (first field) descending + sorted_lines = sorted(lines, key=lambda x: float(x.split()[0]), reverse=True) + + if sorted_lines: + newest = sorted_lines[0].split()[1] + assert 'new.py' in newest + + +# ============================================================================== +# Grep Handler Tests - Direct Implementation +# ============================================================================== + + +class TestGrepHandlerImpl: + """Tests for the actual grep handler implementation logic.""" + + def test_grep_finds_pattern(self, temp_workspace): + """Test grep finds pattern in files.""" + create_test_structure(temp_workspace) + + result = subprocess.run( + ['grep', '-rn', 'def', temp_workspace], + capture_output=True, text=True + ) + + lines = [l for l in result.stdout.strip().split('\n') if l.strip()] + + assert len(lines) >= 3 # Multiple function definitions + assert any('def main' in l for l in lines) + assert any('def helper' in l for l in lines) + + def test_grep_with_line_numbers(self, temp_workspace): + """Test grep output includes line numbers.""" + filepath = create_test_file(temp_workspace, 'test.py', 'line1\nTARGET\nline3') + + result = subprocess.run( + ['grep', '-n', 'TARGET', filepath], + capture_output=True, text=True + ) + + output = result.stdout.strip() + assert '2:TARGET' in output # Line 2 + + def test_grep_with_file_filter(self, temp_workspace): + """Test grep with file type filter using find.""" + create_test_structure(temp_workspace) + + # Find Python files and grep for 'import' + result = subprocess.run( + f'find {temp_workspace} -type f -name "*.py" -exec grep -Hn "import" {{}} \\;', + shell=True, capture_output=True, text=True + ) + + lines = [l for l in result.stdout.strip().split('\n') if l.strip()] + + # Should find imports in Python files + assert any('import' in l for l in lines) + # All results should be from .py files + for line in lines: + if ':' in line: + filepath = line.split(':')[0] + assert filepath.endswith('.py') + + def test_grep_no_matches(self, temp_workspace): + """Test grep returns empty for no matches.""" + create_test_file(temp_workspace, 'test.txt', 'hello world') + + result = subprocess.run( + ['grep', '-r', 'xyznonexistent123', temp_workspace], + capture_output=True, text=True + ) + + assert result.stdout.strip() == '' + + def test_grep_regex_pattern(self, temp_workspace): + """Test grep with regex pattern.""" + create_test_structure(temp_workspace) + + # Find function definitions with regex + result = subprocess.run( + ['grep', '-rE', r'def \w+\(', temp_workspace], + capture_output=True, text=True + ) + + lines = [l for l in result.stdout.strip().split('\n') if l.strip()] + + assert len(lines) >= 3 + assert any('def main(' in l for l in lines) + + def test_grep_case_sensitive(self, temp_workspace): + """Test grep is case-sensitive by default.""" + filepath = create_test_file(temp_workspace, 'case.txt', 'Hello\nhello\nHELLO') + + result = subprocess.run( + ['grep', 'hello', filepath], + capture_output=True, text=True + ) + + lines = result.stdout.strip().split('\n') + assert len(lines) == 1 + assert lines[0] == 'hello' + + +# ============================================================================== +# ListDir Handler Tests - Direct Implementation +# ============================================================================== + + +class TestListDirHandlerImpl: + """Tests for the actual list_dir handler implementation logic.""" + + def test_list_dir_finds_files(self, temp_workspace): + """Test list_dir finds files and directories.""" + create_test_structure(temp_workspace) + + # Using find to list files + result = subprocess.run( + ['find', temp_workspace, '-maxdepth', '3', '-type', 'f'], + capture_output=True, text=True + ) + + files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] + + assert len(files) >= 5 + assert any('main.py' in f for f in files) + assert any('config.json' in f for f in files) + + def test_list_dir_tree_structure(self, temp_workspace): + """Test building tree structure from file list.""" + files = [ + 'src/main.py', + 'src/utils.py', + 'src/lib/core.py', + 'tests/test_main.py', + ] + + # Build tree structure (logic from handler) + dirs = set() + files_by_dir = {} + + for f in files: + d = os.path.dirname(f) or '.' + parts = d.split(os.sep) if d != '.' else [] + + for i in range(len(parts) + 1): + dir_p = os.sep.join(parts[:i]) if i > 0 else '.' + dirs.add(dir_p) + + if d not in files_by_dir: + files_by_dir[d] = [] + files_by_dir[d].append(os.path.basename(f)) + + assert '.' in dirs + assert 'src' in dirs + assert 'src/lib' in dirs + assert 'tests' in dirs + + assert 'main.py' in files_by_dir['src'] + assert 'core.py' in files_by_dir['src/lib'] + + def test_list_dir_ignore_patterns(self, temp_workspace): + """Test that ignore patterns filter correctly.""" + # Create structure with ignorable directories + create_test_file(temp_workspace, 'main.py', 'content') + create_test_file(temp_workspace, 'node_modules/package/index.js', 'content') + create_test_file(temp_workspace, '__pycache__/module.pyc', 'content') + create_test_file(temp_workspace, '.git/config', 'content') + + # Get all files + result = subprocess.run( + ['find', temp_workspace, '-type', 'f'], + capture_output=True, text=True + ) + + all_files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] + + # Filter with ignore patterns + ignores = ['node_modules', '__pycache__', '.git'] + filtered = [f for f in all_files if not any(ig in f for ig in ignores)] + + assert any('main.py' in f for f in filtered) + assert not any('node_modules' in f for f in filtered) + assert not any('__pycache__' in f for f in filtered) + assert not any('.git' in f for f in filtered) + + def test_list_dir_empty_directory(self, temp_workspace): + """Test list_dir on empty directory.""" + empty_dir = os.path.join(temp_workspace, 'empty') + os.makedirs(empty_dir) + + result = subprocess.run( + ['find', empty_dir, '-type', 'f'], + capture_output=True, text=True + ) + + files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] + assert len(files) == 0 + + def test_list_dir_default_ignores(self): + """Test that default ignore patterns are correct.""" + DEFAULT_IGNORES = [ + 'node_modules', + '__pycache__', + '.git', + 'dist', + 'build', + 'target', + 'vendor', + '.venv', + 'venv', + '.cache', + ] + + # Test that common patterns are included + assert 'node_modules' in DEFAULT_IGNORES + assert '__pycache__' in DEFAULT_IGNORES + assert '.git' in DEFAULT_IGNORES + assert '.venv' in DEFAULT_IGNORES + + +# ============================================================================== +# Integration-style Tests - Full Handler Flow +# ============================================================================== + + +class TestFullHandlerFlow: + """Tests that simulate the full handler flow.""" + + def test_read_then_write_then_read(self, temp_workspace): + """Test full read → write → read workflow.""" + filepath = os.path.join(temp_workspace, 'workflow.py') + + # Write initial content + initial_content = 'def hello():\n print("Hello")\n' + with open(filepath, 'w') as f: + f.write(initial_content) + + # Read it back + with open(filepath, 'r') as f: + lines = f.read().split('\n') + formatted = [f"{str(i+1).zfill(5)}| {line}" for i, line in enumerate(lines)] + + assert '00001| def hello():' in formatted + + # Write new content + new_content = 'def hello():\n print("Hello, World!")\n' + with open(filepath, 'w') as f: + f.write(new_content) + + # Read again + with open(filepath, 'r') as f: + final = f.read() + + assert 'Hello, World!' in final + + def test_glob_then_read_files(self, temp_workspace): + """Test glob to find files, then read them.""" + create_test_structure(temp_workspace) + + # Glob for Python files + result = subprocess.run( + ['find', temp_workspace, '-type', 'f', '-name', '*.py'], + capture_output=True, text=True + ) + + py_files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] + + # Read each file + all_content = [] + for filepath in py_files: + with open(filepath, 'r') as f: + content = f.read() + all_content.append(content) + + # Verify we found function definitions + combined = '\n'.join(all_content) + assert 'def main' in combined + assert 'def helper' in combined + + def test_grep_then_read_matching_file(self, temp_workspace): + """Test grep to find pattern, then read the file.""" + create_test_structure(temp_workspace) + + # Grep for 'class' keyword + result = subprocess.run( + ['grep', '-rl', 'class', temp_workspace], + capture_output=True, text=True + ) + + files_with_class = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] + + assert len(files_with_class) >= 1 + + # Read the file with class + for filepath in files_with_class: + with open(filepath, 'r') as f: + content = f.read() + assert 'class' in content + + def test_write_python_and_check_syntax(self, temp_workspace): + """Test writing Python file and checking syntax.""" + filepath = os.path.join(temp_workspace, 'syntax_test.py') + + # Write valid Python + valid_content = 'def test():\n return 42\n\nprint(test())\n' + with open(filepath, 'w') as f: + f.write(valid_content) + + # Check syntax with py_compile + result = subprocess.run( + ['python3', '-m', 'py_compile', filepath], + capture_output=True, text=True + ) + + assert result.returncode == 0 + + def test_write_invalid_python_and_detect_error(self, temp_workspace): + """Test writing invalid Python and detecting syntax error.""" + filepath = os.path.join(temp_workspace, 'invalid_syntax.py') + + # Write invalid Python (missing closing paren) + invalid_content = 'def test(\n return 42\n' + with open(filepath, 'w') as f: + f.write(invalid_content) + + # Check syntax with py_compile + result = subprocess.run( + ['python3', '-m', 'py_compile', filepath], + capture_output=True, text=True + ) + + # Should have error + assert result.returncode != 0 or result.stderr + + +# ============================================================================== +# Ripgrep Integration Tests (if available) +# ============================================================================== + + +class TestRipgrepIntegration: + """Tests using ripgrep if available.""" + + @pytest.fixture(autouse=True) + def check_ripgrep(self): + """Check if ripgrep is available.""" + result = subprocess.run(['which', 'rg'], capture_output=True) + if result.returncode != 0: + pytest.skip("ripgrep (rg) not available") + + def test_rg_glob_files(self, temp_workspace): + """Test ripgrep for globbing files.""" + create_test_structure(temp_workspace) + + result = subprocess.run( + ['rg', '--files', '-g', '*.py', temp_workspace], + capture_output=True, text=True + ) + + files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] + + assert len(files) >= 3 + assert any('main.py' in f for f in files) + + def test_rg_grep_pattern(self, temp_workspace): + """Test ripgrep for content search.""" + create_test_structure(temp_workspace) + + result = subprocess.run( + ['rg', '-n', 'def', temp_workspace], + capture_output=True, text=True + ) + + lines = [l for l in result.stdout.strip().split('\n') if l.strip()] + + assert len(lines) >= 3 + assert any('def main' in l for l in lines) + + def test_rg_with_file_type(self, temp_workspace): + """Test ripgrep with file type filter.""" + create_test_structure(temp_workspace) + + result = subprocess.run( + ['rg', '-n', '-g', '*.py', 'import', temp_workspace], + capture_output=True, text=True + ) + + lines = [l for l in result.stdout.strip().split('\n') if l.strip()] + + # All results should be from Python files + for line in lines: + if ':' in line: + filepath = line.split(':')[0] + assert filepath.endswith('.py') + + def test_rg_sorted_by_mtime(self, temp_workspace): + """Test ripgrep sorts by modification time.""" + create_test_structure(temp_workspace) + + result = subprocess.run( + ['rg', '--files', '--sortr', 'modified', temp_workspace], + capture_output=True, text=True + ) + + files = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()] + + # Should have files (sorted by mtime) + assert len(files) > 0 + diff --git a/tests/unit/agenthub/test_opencode_tools.py b/tests/unit/agenthub/test_opencode_tools.py new file mode 100644 index 000000000000..fc87692572b6 --- /dev/null +++ b/tests/unit/agenthub/test_opencode_tools.py @@ -0,0 +1,848 @@ +"""Unit tests for OpenCode-style tools: Read, Write, Edit, Glob, Grep, ListDir. + +These tests cover: +1. Tool definitions (schema validation) +2. Function calling (response_to_actions conversion) +3. Action class creation and validation +""" + +import json +from unittest.mock import patch + +import pytest +from litellm import ModelResponse + +from openhands.agenthub.codeact_agent.function_calling import response_to_actions +from openhands.agenthub.codeact_agent.tools import ( + EditTool, + GlobTool, + GrepTool, + ListDirTool, + ReadTool, + WriteTool, +) +from openhands.core.exceptions import FunctionCallValidationError +from openhands.core.schema import ActionType +from openhands.events.action import ( + FileEditAction, + GlobAction, + GrepAction, + ListDirAction, + OpenCodeReadAction, + OpenCodeWriteAction, +) +from openhands.events.event import FileEditSource +from openhands.llm.tool_names import ( + EDIT_TOOL_NAME, + GLOB_TOOL_NAME, + GREP_TOOL_NAME, + LIST_DIR_TOOL_NAME, + READ_TOOL_NAME, + WRITE_TOOL_NAME, +) + + +# ============================================================================== +# Helper Functions +# ============================================================================== + + +def create_mock_response(function_name: str, arguments: dict) -> ModelResponse: + """Helper function to create a mock response with a tool call.""" + return ModelResponse( + id='mock-id', + choices=[ + { + 'message': { + 'tool_calls': [ + { + 'function': { + 'name': function_name, + 'arguments': json.dumps(arguments), + }, + 'id': 'mock-tool-call-id', + 'type': 'function', + } + ], + 'content': None, + 'role': 'assistant', + }, + 'index': 0, + 'finish_reason': 'tool_calls', + } + ], + ) + + +def create_mock_response_with_thought( + function_name: str, arguments: dict, thought: str +) -> ModelResponse: + """Helper function to create a mock response with thought content.""" + return ModelResponse( + id='mock-id', + choices=[ + { + 'message': { + 'tool_calls': [ + { + 'function': { + 'name': function_name, + 'arguments': json.dumps(arguments), + }, + 'id': 'mock-tool-call-id', + 'type': 'function', + } + ], + 'content': thought, + 'role': 'assistant', + }, + 'index': 0, + 'finish_reason': 'tool_calls', + } + ], + ) + + +# ============================================================================== +# Tool Definition Tests +# ============================================================================== + + +class TestToolDefinitions: + """Tests for tool schema definitions.""" + + def test_read_tool_schema(self): + """Test ReadTool has correct schema structure.""" + assert ReadTool['type'] == 'function' + func = ReadTool['function'] + assert func['name'] == READ_TOOL_NAME + assert 'parameters' in func + params = func['parameters'] + assert 'file_path' in params['properties'] + assert 'offset' in params['properties'] + assert 'limit' in params['properties'] + assert params['required'] == ['file_path'] + + def test_write_tool_schema(self): + """Test WriteTool has correct schema structure.""" + assert WriteTool['type'] == 'function' + func = WriteTool['function'] + assert func['name'] == WRITE_TOOL_NAME + assert 'parameters' in func + params = func['parameters'] + assert 'file_path' in params['properties'] + assert 'content' in params['properties'] + assert set(params['required']) == {'file_path', 'content'} + + def test_edit_tool_schema(self): + """Test EditTool has correct schema structure.""" + assert EditTool['type'] == 'function' + func = EditTool['function'] + assert func['name'] == EDIT_TOOL_NAME + assert 'parameters' in func + params = func['parameters'] + assert 'file_path' in params['properties'] + assert 'old_string' in params['properties'] + assert 'new_string' in params['properties'] + assert set(params['required']) == {'file_path', 'old_string', 'new_string'} + + def test_glob_tool_schema(self): + """Test GlobTool has correct schema structure.""" + assert GlobTool['type'] == 'function' + func = GlobTool['function'] + assert func['name'] == GLOB_TOOL_NAME + assert 'parameters' in func + params = func['parameters'] + assert 'pattern' in params['properties'] + assert 'path' in params['properties'] + assert params['required'] == ['pattern'] + + def test_grep_tool_schema(self): + """Test GrepTool has correct schema structure.""" + assert GrepTool['type'] == 'function' + func = GrepTool['function'] + assert func['name'] == GREP_TOOL_NAME + assert 'parameters' in func + params = func['parameters'] + assert 'pattern' in params['properties'] + assert 'path' in params['properties'] + assert 'include' in params['properties'] + assert params['required'] == ['pattern'] + + def test_list_dir_tool_schema(self): + """Test ListDirTool has correct schema structure.""" + assert ListDirTool['type'] == 'function' + func = ListDirTool['function'] + assert func['name'] == LIST_DIR_TOOL_NAME + assert 'parameters' in func + params = func['parameters'] + assert 'path' in params['properties'] + assert 'ignore' in params['properties'] + # path is optional, so required should be empty or not include path + assert 'required' not in params or 'path' not in params.get('required', []) + + +# ============================================================================== +# ReadTool Function Calling Tests +# ============================================================================== + + +class TestReadToolFunctionCalling: + """Tests for ReadTool function calling.""" + + def test_read_tool_valid_basic(self): + """Test ReadTool with just file_path.""" + response = create_mock_response(READ_TOOL_NAME, {'file_path': '/path/to/file.py'}) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], OpenCodeReadAction) + assert actions[0].path == '/path/to/file.py' + assert actions[0].offset == 0 # default + assert actions[0].limit == 2000 # default + assert actions[0].action == ActionType.OPENCODE_READ + + def test_read_tool_with_offset(self): + """Test ReadTool with offset parameter.""" + response = create_mock_response( + READ_TOOL_NAME, {'file_path': '/path/to/file.py', 'offset': 100} + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], OpenCodeReadAction) + assert actions[0].offset == 100 + assert actions[0].limit == 2000 + + def test_read_tool_with_limit(self): + """Test ReadTool with limit parameter.""" + response = create_mock_response( + READ_TOOL_NAME, {'file_path': '/path/to/file.py', 'limit': 500} + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], OpenCodeReadAction) + assert actions[0].offset == 0 + assert actions[0].limit == 500 + + def test_read_tool_with_all_params(self): + """Test ReadTool with all parameters.""" + response = create_mock_response( + READ_TOOL_NAME, {'file_path': '/path/to/file.py', 'offset': 50, 'limit': 100} + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], OpenCodeReadAction) + assert actions[0].path == '/path/to/file.py' + assert actions[0].offset == 50 + assert actions[0].limit == 100 + + def test_read_tool_missing_file_path(self): + """Test ReadTool raises error when file_path is missing.""" + response = create_mock_response(READ_TOOL_NAME, {'offset': 10}) + with pytest.raises(FunctionCallValidationError) as exc_info: + response_to_actions(response) + assert 'Missing required argument "file_path"' in str(exc_info.value) + + def test_read_tool_with_thought(self): + """Test ReadTool preserves thought content.""" + response = create_mock_response_with_thought( + READ_TOOL_NAME, {'file_path': '/test.py'}, 'Let me read this file' + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert actions[0].thought == 'Let me read this file' + + +# ============================================================================== +# WriteTool Function Calling Tests +# ============================================================================== + + +class TestWriteToolFunctionCalling: + """Tests for WriteTool function calling.""" + + def test_write_tool_valid(self): + """Test WriteTool with valid arguments.""" + response = create_mock_response( + WRITE_TOOL_NAME, + {'file_path': '/path/to/file.py', 'content': 'print("hello world")'}, + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], OpenCodeWriteAction) + assert actions[0].path == '/path/to/file.py' + assert actions[0].content == 'print("hello world")' + assert actions[0].action == ActionType.OPENCODE_WRITE + + def test_write_tool_multiline_content(self): + """Test WriteTool with multiline content.""" + content = 'def hello():\n print("hello")\n\nif __name__ == "__main__":\n hello()' + response = create_mock_response( + WRITE_TOOL_NAME, {'file_path': '/path/to/file.py', 'content': content} + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], OpenCodeWriteAction) + assert actions[0].content == content + + def test_write_tool_empty_content(self): + """Test WriteTool with empty content (valid case for creating empty file).""" + response = create_mock_response( + WRITE_TOOL_NAME, {'file_path': '/path/to/file.py', 'content': ''} + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], OpenCodeWriteAction) + assert actions[0].content == '' + + def test_write_tool_missing_file_path(self): + """Test WriteTool raises error when file_path is missing.""" + response = create_mock_response(WRITE_TOOL_NAME, {'content': 'some content'}) + with pytest.raises(FunctionCallValidationError) as exc_info: + response_to_actions(response) + assert 'Missing required argument "file_path"' in str(exc_info.value) + + def test_write_tool_missing_content(self): + """Test WriteTool raises error when content is missing.""" + response = create_mock_response(WRITE_TOOL_NAME, {'file_path': '/path/to/file.py'}) + with pytest.raises(FunctionCallValidationError) as exc_info: + response_to_actions(response) + assert 'Missing required argument "content"' in str(exc_info.value) + + def test_write_tool_special_characters(self): + """Test WriteTool handles special characters in content.""" + content = 'print("Special: \\n\\t\\"escaped\\" \'quotes\'")' + response = create_mock_response( + WRITE_TOOL_NAME, {'file_path': '/path/to/file.py', 'content': content} + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert actions[0].content == content + + +# ============================================================================== +# EditTool Function Calling Tests +# ============================================================================== + + +class TestEditToolFunctionCalling: + """Tests for EditTool function calling.""" + + def test_edit_tool_valid(self): + """Test EditTool with valid arguments.""" + response = create_mock_response( + EDIT_TOOL_NAME, + { + 'file_path': '/path/to/file.py', + 'old_string': 'def foo():', + 'new_string': 'def bar():', + }, + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], FileEditAction) + assert actions[0].path == '/path/to/file.py' + assert actions[0].old_str == 'def foo():' + assert actions[0].new_str == 'def bar():' + assert actions[0].command == 'str_replace' + assert actions[0].impl_source == FileEditSource.OH_ACI + + def test_edit_tool_multiline_replacement(self): + """Test EditTool with multiline strings.""" + old = 'def foo():\n pass' + new = 'def foo():\n return 42' + response = create_mock_response( + EDIT_TOOL_NAME, + {'file_path': '/path/to/file.py', 'old_string': old, 'new_string': new}, + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert actions[0].old_str == old + assert actions[0].new_str == new + + def test_edit_tool_empty_new_string(self): + """Test EditTool with empty new_string (deletion).""" + response = create_mock_response( + EDIT_TOOL_NAME, + { + 'file_path': '/path/to/file.py', + 'old_string': 'remove this', + 'new_string': '', + }, + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert actions[0].new_str == '' + + def test_edit_tool_missing_file_path(self): + """Test EditTool raises error when file_path is missing.""" + response = create_mock_response( + EDIT_TOOL_NAME, {'old_string': 'old', 'new_string': 'new'} + ) + with pytest.raises(FunctionCallValidationError) as exc_info: + response_to_actions(response) + assert 'Missing required argument "file_path"' in str(exc_info.value) + + def test_edit_tool_missing_old_string(self): + """Test EditTool raises error when old_string is missing.""" + response = create_mock_response( + EDIT_TOOL_NAME, {'file_path': '/path/to/file.py', 'new_string': 'new'} + ) + with pytest.raises(FunctionCallValidationError) as exc_info: + response_to_actions(response) + assert 'Missing required argument "old_string"' in str(exc_info.value) + + def test_edit_tool_missing_new_string(self): + """Test EditTool raises error when new_string is missing.""" + response = create_mock_response( + EDIT_TOOL_NAME, {'file_path': '/path/to/file.py', 'old_string': 'old'} + ) + with pytest.raises(FunctionCallValidationError) as exc_info: + response_to_actions(response) + assert 'Missing required argument "new_string"' in str(exc_info.value) + + +# ============================================================================== +# GlobTool Function Calling Tests +# ============================================================================== + + +class TestGlobToolFunctionCalling: + """Tests for GlobTool function calling.""" + + def test_glob_tool_valid_basic(self): + """Test GlobTool with just pattern.""" + response = create_mock_response(GLOB_TOOL_NAME, {'pattern': '*.py'}) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], GlobAction) + assert actions[0].pattern == '*.py' + assert actions[0].path == '.' # default + assert actions[0].action == ActionType.GLOB + + def test_glob_tool_with_path(self): + """Test GlobTool with path parameter.""" + response = create_mock_response( + GLOB_TOOL_NAME, {'pattern': '*.ts', 'path': '/project/src'} + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], GlobAction) + assert actions[0].pattern == '*.ts' + assert actions[0].path == '/project/src' + + def test_glob_tool_recursive_pattern(self): + """Test GlobTool with recursive pattern.""" + response = create_mock_response(GLOB_TOOL_NAME, {'pattern': '**/*.test.js'}) + actions = response_to_actions(response) + assert len(actions) == 1 + assert actions[0].pattern == '**/*.test.js' + + def test_glob_tool_missing_pattern(self): + """Test GlobTool raises error when pattern is missing.""" + response = create_mock_response(GLOB_TOOL_NAME, {'path': '/some/path'}) + with pytest.raises(FunctionCallValidationError) as exc_info: + response_to_actions(response) + assert 'Missing required argument "pattern"' in str(exc_info.value) + + def test_glob_tool_complex_patterns(self): + """Test GlobTool with various glob patterns.""" + patterns = [ + '*.{js,ts}', + '[!_]*.py', + '**/*_test.go', + '*.py[cod]', + ] + for pattern in patterns: + response = create_mock_response(GLOB_TOOL_NAME, {'pattern': pattern}) + actions = response_to_actions(response) + assert len(actions) == 1 + assert actions[0].pattern == pattern + + +# ============================================================================== +# GrepTool Function Calling Tests +# ============================================================================== + + +class TestGrepToolFunctionCalling: + """Tests for GrepTool function calling.""" + + def test_grep_tool_valid_basic(self): + """Test GrepTool with just pattern.""" + response = create_mock_response(GREP_TOOL_NAME, {'pattern': 'TODO'}) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], GrepAction) + assert actions[0].pattern == 'TODO' + assert actions[0].path == '.' # default + assert actions[0].include == '' # default + assert actions[0].action == ActionType.GREP + + def test_grep_tool_with_path(self): + """Test GrepTool with path parameter.""" + response = create_mock_response( + GREP_TOOL_NAME, {'pattern': 'import', 'path': '/project/src'} + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], GrepAction) + assert actions[0].path == '/project/src' + + def test_grep_tool_with_include(self): + """Test GrepTool with include filter.""" + response = create_mock_response( + GREP_TOOL_NAME, {'pattern': 'class.*Handler', 'include': '*.py'} + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], GrepAction) + assert actions[0].include == '*.py' + + def test_grep_tool_with_all_params(self): + """Test GrepTool with all parameters.""" + response = create_mock_response( + GREP_TOOL_NAME, + {'pattern': 'function', 'path': '/app', 'include': '*.{js,ts}'}, + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert actions[0].pattern == 'function' + assert actions[0].path == '/app' + assert actions[0].include == '*.{js,ts}' + + def test_grep_tool_missing_pattern(self): + """Test GrepTool raises error when pattern is missing.""" + response = create_mock_response(GREP_TOOL_NAME, {'path': '/some/path'}) + with pytest.raises(FunctionCallValidationError) as exc_info: + response_to_actions(response) + assert 'Missing required argument "pattern"' in str(exc_info.value) + + def test_grep_tool_regex_pattern(self): + """Test GrepTool with regex patterns.""" + patterns = [ + r'def \w+\(', + r'^import', + r'[A-Z][a-z]+Error', + r'\d{4}-\d{2}-\d{2}', + ] + for pattern in patterns: + response = create_mock_response(GREP_TOOL_NAME, {'pattern': pattern}) + actions = response_to_actions(response) + assert len(actions) == 1 + assert actions[0].pattern == pattern + + +# ============================================================================== +# ListDirTool Function Calling Tests +# ============================================================================== + + +class TestListDirToolFunctionCalling: + """Tests for ListDirTool function calling.""" + + def test_list_dir_tool_default(self): + """Test ListDirTool with defaults.""" + response = create_mock_response(LIST_DIR_TOOL_NAME, {}) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], ListDirAction) + assert actions[0].path == '.' # default + assert actions[0].ignore == [] # default + assert actions[0].action == ActionType.LIST_DIR + + def test_list_dir_tool_with_path(self): + """Test ListDirTool with path parameter.""" + response = create_mock_response(LIST_DIR_TOOL_NAME, {'path': '/project/src'}) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], ListDirAction) + assert actions[0].path == '/project/src' + + def test_list_dir_tool_with_ignore(self): + """Test ListDirTool with ignore patterns.""" + response = create_mock_response( + LIST_DIR_TOOL_NAME, {'ignore': ['*.log', 'tmp', '*.bak']} + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert isinstance(actions[0], ListDirAction) + assert actions[0].ignore == ['*.log', 'tmp', '*.bak'] + + def test_list_dir_tool_with_all_params(self): + """Test ListDirTool with all parameters.""" + response = create_mock_response( + LIST_DIR_TOOL_NAME, {'path': '/app', 'ignore': ['node_modules', 'dist']} + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert actions[0].path == '/app' + assert actions[0].ignore == ['node_modules', 'dist'] + + def test_list_dir_default_ignores(self): + """Test ListDirAction has correct default ignore patterns.""" + action = ListDirAction() + expected_defaults = [ + 'node_modules', + '__pycache__', + '.git', + 'dist', + 'build', + 'target', + 'vendor', + '.venv', + 'venv', + '.cache', + ] + assert action.DEFAULT_IGNORES == expected_defaults + assert action.all_ignores == expected_defaults + + def test_list_dir_combined_ignores(self): + """Test ListDirAction combines default and custom ignores.""" + action = ListDirAction(ignore=['custom', '*.tmp']) + assert 'node_modules' in action.all_ignores + assert 'custom' in action.all_ignores + assert '*.tmp' in action.all_ignores + + +# ============================================================================== +# Action Class Tests +# ============================================================================== + + +class TestOpenCodeReadAction: + """Tests for OpenCodeReadAction class.""" + + def test_action_creation(self): + """Test basic action creation.""" + action = OpenCodeReadAction(path='/test.py') + assert action.path == '/test.py' + assert action.offset == 0 + assert action.limit == 2000 + assert action.action == ActionType.OPENCODE_READ + assert action.runnable is True + + def test_action_with_params(self): + """Test action creation with parameters.""" + action = OpenCodeReadAction(path='/test.py', offset=100, limit=500) + assert action.offset == 100 + assert action.limit == 500 + + def test_action_message(self): + """Test action message property.""" + action = OpenCodeReadAction(path='/test.py') + assert action.message == 'Reading file: /test.py' + + def test_action_message_with_offset(self): + """Test action message with offset.""" + action = OpenCodeReadAction(path='/test.py', offset=50) + assert action.message == 'Reading file: /test.py (from line 51)' + + +class TestOpenCodeWriteAction: + """Tests for OpenCodeWriteAction class.""" + + def test_action_creation(self): + """Test basic action creation.""" + action = OpenCodeWriteAction(path='/test.py', content='print("hi")') + assert action.path == '/test.py' + assert action.content == 'print("hi")' + assert action.action == ActionType.OPENCODE_WRITE + assert action.runnable is True + + def test_action_message(self): + """Test action message property.""" + action = OpenCodeWriteAction(path='/test.py', content='') + assert action.message == 'Writing file: /test.py' + + +class TestGlobAction: + """Tests for GlobAction class.""" + + def test_action_creation(self): + """Test basic action creation.""" + action = GlobAction(pattern='*.py') + assert action.pattern == '*.py' + assert action.path == '.' + assert action.action == ActionType.GLOB + assert action.runnable is True + + def test_action_with_path(self): + """Test action creation with path.""" + action = GlobAction(pattern='*.ts', path='/project') + assert action.path == '/project' + + def test_action_message(self): + """Test action message property.""" + action = GlobAction(pattern='**/*.py') + assert action.message == 'Searching for files matching: **/*.py' + + +class TestGrepAction: + """Tests for GrepAction class.""" + + def test_action_creation(self): + """Test basic action creation.""" + action = GrepAction(pattern='TODO') + assert action.pattern == 'TODO' + assert action.path == '.' + assert action.include == '' + assert action.action == ActionType.GREP + assert action.runnable is True + + def test_action_with_params(self): + """Test action creation with all params.""" + action = GrepAction(pattern='import', path='/src', include='*.py') + assert action.path == '/src' + assert action.include == '*.py' + + def test_action_message(self): + """Test action message property.""" + action = GrepAction(pattern='class.*Test') + assert action.message == 'Searching for pattern: class.*Test' + + +class TestListDirAction: + """Tests for ListDirAction class.""" + + def test_action_creation(self): + """Test basic action creation.""" + action = ListDirAction() + assert action.path == '.' + assert action.ignore == [] + assert action.action == ActionType.LIST_DIR + assert action.runnable is True + + def test_action_with_params(self): + """Test action creation with parameters.""" + action = ListDirAction(path='/project', ignore=['*.log']) + assert action.path == '/project' + assert action.ignore == ['*.log'] + + def test_action_message(self): + """Test action message property.""" + action = ListDirAction(path='/project') + assert action.message == 'Listing directory: /project' + + def test_action_message_default(self): + """Test action message with default path.""" + action = ListDirAction() + # Message contains the path, which is '.' + assert '.' in action.message or 'directory' in action.message + + +# ============================================================================== +# Edge Cases and Error Handling Tests +# ============================================================================== + + +class TestEdgeCases: + """Tests for edge cases and special scenarios.""" + + def test_invalid_json_arguments(self): + """Test handling of invalid JSON in arguments.""" + response = ModelResponse( + id='mock-id', + choices=[ + { + 'message': { + 'tool_calls': [ + { + 'function': { + 'name': READ_TOOL_NAME, + 'arguments': 'not valid json', + }, + 'id': 'mock-tool-call-id', + 'type': 'function', + } + ], + 'content': None, + 'role': 'assistant', + }, + 'index': 0, + 'finish_reason': 'tool_calls', + } + ], + ) + with pytest.raises(FunctionCallValidationError) as exc_info: + response_to_actions(response) + assert 'Failed to parse tool call arguments' in str(exc_info.value) + + def test_unicode_in_paths(self): + """Test handling of Unicode characters in paths.""" + response = create_mock_response(READ_TOOL_NAME, {'file_path': '/путь/к/файлу.py'}) + actions = response_to_actions(response) + assert len(actions) == 1 + assert actions[0].path == '/путь/к/файлу.py' + + def test_unicode_in_content(self): + """Test handling of Unicode characters in content.""" + content = '# 你好世界\nprint("こんにちは")' + response = create_mock_response( + WRITE_TOOL_NAME, {'file_path': '/test.py', 'content': content} + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert actions[0].content == content + + def test_whitespace_handling_in_edit(self): + """Test whitespace preservation in edit operations.""" + old = ' def foo():\n pass' + new = ' def foo():\n return None' + response = create_mock_response( + EDIT_TOOL_NAME, + {'file_path': '/test.py', 'old_string': old, 'new_string': new}, + ) + actions = response_to_actions(response) + assert len(actions) == 1 + assert actions[0].old_str == old + assert actions[0].new_str == new + + def test_empty_path_glob(self): + """Test empty path defaults to current directory.""" + response = create_mock_response(GLOB_TOOL_NAME, {'pattern': '*.py', 'path': ''}) + actions = response_to_actions(response) + assert len(actions) == 1 + # Empty string should work (will be interpreted as current dir) + assert actions[0].path == '' + + def test_multiple_tool_calls(self): + """Test handling of multiple tool calls in single response.""" + response = ModelResponse( + id='mock-id', + choices=[ + { + 'message': { + 'tool_calls': [ + { + 'function': { + 'name': READ_TOOL_NAME, + 'arguments': json.dumps( + {'file_path': '/file1.py'} + ), + }, + 'id': 'call-1', + 'type': 'function', + }, + { + 'function': { + 'name': READ_TOOL_NAME, + 'arguments': json.dumps( + {'file_path': '/file2.py'} + ), + }, + 'id': 'call-2', + 'type': 'function', + }, + ], + 'content': None, + 'role': 'assistant', + }, + 'index': 0, + 'finish_reason': 'tool_calls', + } + ], + ) + actions = response_to_actions(response) + assert len(actions) == 2 + assert actions[0].path == '/file1.py' + assert actions[1].path == '/file2.py' diff --git a/tmux_test.py b/tmux_test.py new file mode 100644 index 000000000000..b0f7997a3284 --- /dev/null +++ b/tmux_test.py @@ -0,0 +1,210 @@ +import libtmux +import psutil +import threading +import time +import os +import sys +import uuid + + +class TmuxMemoryMonitor(threading.Thread): + def __init__(self, tmux_server, limit_mb, interval=0.5): + super().__init__(daemon=True) + self.server = tmux_server + self.limit_mb = limit_mb + self.limit_bytes = limit_mb * 1024 * 1024 + self.interval = interval + self.running = True + self.kill_triggered = False + + def _get_server_pid(self): + try: + pid_str = self.server.cmd("display-message", "-p", "#{pid}").stdout[0] + return int(pid_str) + except: + return None + + def get_tree_memory(self, parent_pid): + total_mem = 0 + try: + parent = psutil.Process(parent_pid) + # recursive=True finds children AND grandchildren + procs = [parent] + parent.children(recursive=True) + for p in procs: + try: + total_mem += p.memory_info().rss + except (psutil.NoSuchProcess, psutil.AccessDenied): + pass + except psutil.NoSuchProcess: + return 0 + return total_mem + + def kill_inner_processes(self): + try: + for session in self.server.sessions: + for window in session.windows: + for pane in window.panes: + try: + # 1. Get Shell PID inside the pane + pane_pid = int( + pane.cmd("display-message", "-p", "#{pane_pid}").stdout[ + 0 + ] + ) + parent = psutil.Process(pane_pid) + + # 2. Get EVERY descendant (Main script + Background procs) + children = parent.children(recursive=True) + + print( + f"[GUARD] Found {len(children)} descendants in Pane {pane.id} (Shell PID: {pane_pid})" + ) + + # 3. Log and Kill them all + for child in children: + try: + # Grab info before killing + pid = child.pid + cmd = " ".join(child.cmdline()) + + print( + f"[GUARD] 🗡️ Killing PID: {pid:<6} CMD: {cmd}..." + ) # Truncate long cmds + child.kill() + except (psutil.NoSuchProcess, psutil.AccessDenied): + print(f"[GUARD] ⚠️ Process vanished before kill.") + + except (psutil.NoSuchProcess, IndexError, ValueError): + continue + except Exception as e: + print(f"[GUARD] Error: {e}") + + def run(self): + print(f"[GUARD] Monitoring started. Limit: {self.limit_mb}MB") + time.sleep(1) + server_pid = self._get_server_pid() + if not server_pid: + return + + while self.running: + try: + used_bytes = self.get_tree_memory(server_pid) + except: + break + + used_mb = used_bytes / (1024 * 1024) + print(f"[GUARD] Tree Usage: {int(used_mb)}MB / {self.limit_mb}MB") + + if used_bytes > self.limit_bytes: + print( + f"\n[GUARD] 🚨 LIMIT EXCEEDED ({int(used_mb)}MB)! Killing inner processes..." + ) + self.kill_inner_processes() + self.kill_triggered = True + time.sleep(5) + + time.sleep(self.interval) + + +# --- 2. LEAKER (With Background Procs) --- +def create_leaker_script(filename="leaker_verbose.py"): + script_content = """ +import multiprocessing +import subprocess +import time +import os +import sys + +def consume_memory_worker(mb): + try: + data = bytearray(mb * 1024 * 1024) + while True: time.sleep(1) + except: pass + +if __name__ == "__main__": + processes = [] + print(f"--- Main Process PID: {os.getpid()} ---") + + # 1. Spawn a sneaky BACKGROUND process + print("--- Spawning Background Process (100MB) ---") + bg_code = "import time; a = bytearray(100*1024*1024); time.sleep(1000)" + # We use -c to run inline python code + bg_proc = subprocess.Popen([sys.executable, "-c", bg_code]) + processes.append(bg_proc) + + # 2. Spawn 8 Standard Workers (50MB each) + print("--- Spawning 8 Standard Workers ---") + for i in range(100): + p = multiprocessing.Process(target=consume_memory_worker, args=(50,)) + p.start() + processes.append(p) + time.sleep(0.2) + + print("--- ALL STARTED. Waiting to be killed... ---") + try: + while True: time.sleep(1) + except: pass +""" + with open(filename, "w") as f: + f.write(script_content) + return os.path.abspath(filename) + + +# --- 3. TEST RUNNER --- +def run_test(): + leaker_file = create_leaker_script() + unique_socket = f"/tmp/tmux-test-{uuid.uuid4()}" + + print(f"1. Starting Tmux Server (Socket: {unique_socket})") + server = libtmux.Server(socket_path=unique_socket) + + session = server.new_session(session_name="verbose_test", start_directory="/tmp") + pane = session.active_pane + server_pid = int(server.cmd("display-message", "-p", "#{pid}").stdout[0]) + + print("2. Sending Leaker command...") + pane.send_keys(f"{sys.executable} {leaker_file}") + + # Limit 300MB. Leaker uses ~500MB + print("3. Starting Guard (Limit: 300MB)") + guard = TmuxMemoryMonitor(server, limit_mb=1000, interval=0.5) + guard.start() + + start_time = time.time() + try: + while time.time() - start_time < 30: + if guard.kill_triggered: + print("\n[TEST] Kill triggered. Verifying cleanup...") + time.sleep(3) + + if not psutil.pid_exists(server_pid): + print("❌ FAIL: Tmux Server died!") + return + + current_mem_mb = guard.get_tree_memory(server_pid) / (1024 * 1024) + print(f"[TEST] Residual Memory: {int(current_mem_mb)}MB") + + if current_mem_mb < 50: + print( + "✅ PASS: Memory dropped. All processes (including background) killed." + ) + return + else: + print("❌ FAIL: Memory still high!") + return + + time.sleep(1) + print("\n❌ FAIL: Timeout.") + + finally: + if os.path.exists(leaker_file): + os.remove(leaker_file) + guard.running = False + try: + server.kill_server() + except: + pass + + +if __name__ == "__main__": + run_test()