Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
303 changes: 303 additions & 0 deletions autoenv/claude_code_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
from __future__ import annotations

from pathlib import Path
from typing import Any, Dict, List, Optional

from pydantic import Field, model_validator, PrivateAttr

from base.agent.base_agent import BaseAgent


try:
from claude_agent_sdk import (
query,
ClaudeAgentOptions,
ClaudeSDKError,
CLINotFoundError,
ProcessError,
CLIJSONDecodeError,
)
CLAUDE_AGENT_AVAILABLE = True
except ImportError:
CLAUDE_AGENT_AVAILABLE = False
# Fallback types
AssistantMessage = UserMessage = ResultMessage = Any
TextBlock = ToolUseBlock = ToolResultBlock = Any
ClaudeSDKError = CLINotFoundError = ProcessError = CLIJSONDecodeError = Exception
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fallback type assignments on lines 24-26 create type aliases that map to Any or Exception, which defeats the purpose of type hints. When CLAUDE_AGENT_AVAILABLE is False, these fallback types will allow any value without type checking, potentially hiding bugs. Consider using proper type stubs or Protocol definitions for better type safety when the SDK is not installed.

Copilot uses AI. Check for mistakes.


class ClaudeCodeAgent(BaseAgent):
"""Claude Code Agent for code generation using Claude Agent SDK."""
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The class docstring mentions "Claude Code Agent for code generation using Claude Agent SDK" but doesn't provide information about installation, requirements, or usage. While the validator provides an install command (line 74), the class-level documentation should be more comprehensive with examples of how to use the agent.

Suggested change
"""Claude Code Agent for code generation using Claude Agent SDK."""
"""Claude Code Agent for code generation using Claude Agent SDK.
This agent integrates with the external `claude_agent_sdk` package to provide
code-centric automation such as writing, editing, and executing code in a
local project directory. It is designed to be used as a `BaseAgent`
subclass within the autoenv framework.
Installation
------------
The `claude_agent_sdk` package must be installed for this agent to be fully
functional. If it is missing, `CLAUDE_AGENT_AVAILABLE` will be set to
``False`` and calls into the SDK will fail.
Install the SDK via pip:
pip install claude-agent-sdk
or add it to your project's dependencies.
Requirements
-----------
- Python environment with access to the Claude Code CLI used by
`claude_agent_sdk`.
- Valid configuration/credentials for the underlying Claude tooling, as
required by the SDK and CLI.
- (Optional) A working directory ``cwd`` that points to the root of the
project the agent should operate on.
Basic usage example
-------------------
Instantiate the agent and run it on a coding task:
.. code-block:: python
from pathlib import Path
from autoenv.claude_code_agent import ClaudeCodeAgent
agent = ClaudeCodeAgent(
cwd=Path("/path/to/your/project"),
allowed_tools=["Read", "Write", "Bash"],
max_turns=8,
)
# Example high-level call (actual API depends on BaseAgent interface):
result = agent.run("Add unit tests for the user authentication module.")
print(result)
Within larger systems, `ClaudeCodeAgent` can be composed with other agents
or orchestrators that expect a `BaseAgent`-compatible interface.
"""

Copilot uses AI. Check for mistakes.

name: str = Field(default="claude_code", description="Agent name")
description: str = Field(
default="Claude Code agent for code generation and execution",
description="Agent description"
)

# Claude Code specific settings
# NOTE: Uses `max_turns` for conversation turns with Claude Code CLI (vs BaseAgent's generic `max_steps`)
max_turns: int = Field(default=10, description="Maximum conversation turns")
cwd: Optional[Path] = Field(default=None, description="Working directory")
allowed_tools: Optional[List[str]] = Field(
default=None,
description="Allowed tools (e.g., ['Read', 'Write', 'Bash'])"
)
permission_mode: str = Field(
default="acceptEdits",
description="Permission mode: default|acceptEdits|bypassPermissions|plan"
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as CodexAgent - the permission_mode field lists four modes but doesn't explain what each mode does. This is especially important given the security implications of "bypassPermissions". Add detailed documentation for each permission mode.

Suggested change
description="Permission mode: default|acceptEdits|bypassPermissions|plan"
description=(
"Controls how the agent handles permission prompts for edits, tool use, and commands. "
"Supported values:\n"
"- 'default': Use the Claude Code CLI / SDK's default interactive behavior for requesting\n"
" permission before making changes or running tools.\n"
"- 'acceptEdits': Automatically accept and apply code edits suggested by the agent while\n"
" still requiring confirmation for other sensitive actions (e.g., shell commands).\n"
"- 'bypassPermissions': Run tools, apply edits, and execute commands without asking for\n"
" interactive confirmation. This effectively disables permission prompts and should only\n"
" be used in fully trusted, sandboxed environments due to its security implications.\n"
"- 'plan': Focus on generating and updating a plan of actions without automatically\n"
" executing potentially destructive steps; execution typically requires separate approval."
)

Copilot uses AI. Check for mistakes.
)
system_prompt_override: Optional[str] = Field(
default=None,
description="Override system prompt (only for non-interactive mode)"
)
append_system_prompt: Optional[str] = Field(
default=None,
description="Append to system prompt (only for non-interactive mode)"
)

class Config:
arbitrary_types_allowed = True

# Private attributes for internal state
_messages: List[Any] = PrivateAttr(default_factory=list)
_session_id: Optional[str] = PrivateAttr(default=None)
_total_cost_usd: float = PrivateAttr(default=0.0)
_current_prompt: Optional[str] = PrivateAttr(default=None)

@model_validator(mode="after")
def validate_claude_agent_available(self) -> "ClaudeCodeAgent":
"""Validate that Claude Agent SDK is available."""
if not CLAUDE_AGENT_AVAILABLE:
raise ImportError(
"Claude Agent SDK is not installed. "
"Install it with: pip install claude-agent-sdk\n"
"Note: The Claude Code CLI is automatically bundled - no separate installation needed!"
)

# Set default cwd if not provided
if self.cwd is None:
self.cwd = Path.cwd()
else:
self.cwd = Path(self.cwd)

# Validate working directory exists
self._validate_cwd()

# Validate permission mode
valid_modes = ["default", "acceptEdits", "bypassPermissions", "plan"]
if self.permission_mode not in valid_modes:
raise ValueError(
f"Invalid permission_mode: {self.permission_mode}. "
f"Must be one of: {valid_modes}"
)

return self

def _validate_cwd(self) -> None:
"""Validate working directory exists and is a directory."""
if not self.cwd.exists():
raise FileNotFoundError(f"Working directory does not exist: {self.cwd}")
if not self.cwd.is_dir():
raise NotADirectoryError(f"Working directory path is not a directory: {self.cwd}")

def _create_options(self) -> ClaudeAgentOptions:
"""Create ClaudeAgentOptions from agent settings."""
options_dict = {
"max_turns": self.max_turns,
"cwd": str(self.cwd),
"permission_mode": self.permission_mode,
}

if self.allowed_tools:
options_dict["allowed_tools"] = self.allowed_tools
if self.system_prompt_override:
options_dict["system_prompt"] = self.system_prompt_override
elif self.append_system_prompt:
options_dict["append_system_prompt"] = self.append_system_prompt

return ClaudeAgentOptions(**options_dict)

def _handle_sdk_error(self, e: Exception) -> str:
"""Handle Claude SDK errors with consistent formatting.

Only converts expected SDK errors to user-friendly strings.
Re-raises unexpected exceptions (programming errors) for proper debugging.

Args:
e: The exception to handle

Returns:
Formatted error message string for SDK errors

Raises:
Exception: Re-raises non-SDK exceptions for proper error tracking
"""
if isinstance(e, CLINotFoundError):
return f"Error: Claude Code CLI not found: {str(e)}"
elif isinstance(e, ProcessError):
exit_code = getattr(e, 'exit_code', 'unknown')
return f"Error: Process failed with exit code {exit_code}: {str(e)}"
elif isinstance(e, CLIJSONDecodeError):
return f"Error: Failed to parse Claude response: {str(e)}"
elif isinstance(e, ClaudeSDKError):
return f"Error: Claude SDK error: {str(e)}"
else:
# Re-raise unexpected exceptions (programming errors, etc.) for proper debugging
raise
Comment on lines +180 to +206
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _handle_sdk_error method has inconsistent error handling logic. It checks for specific exception types using isinstance, but if CLAUDE_AGENT_AVAILABLE is False, all the exception types are set to the base Exception class (line 26). This means the isinstance checks will not work as intended, and all exceptions would be caught by the final else clause and re-raised. The method should handle the case where SDK types are unavailable.

Copilot uses AI. Check for mistakes.

async def _process_query_stream(self, prompt: str, options: ClaudeAgentOptions) -> str:
"""Process the query stream and extract result.

Handles message iteration, session tracking, cost accumulation, and result extraction.
This is shared logic between step() and run() methods.

Args:
prompt: The prompt to send to Claude
options: Claude agent options

Returns:
Result text from the query
"""
result_text = ""
async for message in query(prompt=prompt, options=options):
self._messages.append(message)

if hasattr(message, 'session_id'):
self._session_id = message.session_id

# Skip non-result messages
if not (hasattr(message, 'type') and message.type == "result"):
continue

# Track costs for result messages
if hasattr(message, 'total_cost_usd'):
self._total_cost_usd += message.total_cost_usd

# Capture result if available
if hasattr(message, 'result'):
result_text = message.result
# Handle error or completion subtypes
elif hasattr(message, 'subtype'):
if message.subtype == "error_max_turns":
result_text = f"Error: Reached maximum turns ({self.max_turns})"
elif message.subtype == "error_during_execution":
result_text = "Error: Execution failed"
else:
result_text = f"Completed with status: {message.subtype}"
else:
result_text = "Execution completed"

return result_text if result_text else "No result received"

async def step(self) -> str:
"""Execute a single step in the agent's workflow."""
if not self._current_prompt:
return "No prompt provided. Use run() method to execute tasks."

try:
options = self._create_options()
return await self._process_query_stream(self._current_prompt, options)
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error handling in step() (line 201-202) catches all exceptions and passes them to _handle_sdk_error, which re-raises unexpected exceptions. However, this means that validation errors like FileNotFoundError and NotADirectoryError from _validate_cwd would be re-raised instead of being converted to error strings. This is inconsistent with the documented behavior and differs from how CodexAgent handles the same exceptions.

Suggested change
return await self._process_query_stream(self._current_prompt, options)
return await self._process_query_stream(self._current_prompt, options)
except (FileNotFoundError, NotADirectoryError) as e:
# Convert validation errors (e.g., invalid cwd) to user-facing error strings,
# instead of letting them be re-raised as unexpected SDK errors.
return f"Error: {e}"

Copilot uses AI. Check for mistakes.
except Exception as e:
return self._handle_sdk_error(e)

async def run(self, request: Optional[str] = None, **kwargs) -> str:
"""Execute the agent's main loop asynchronously.

Args:
request: The task or prompt to execute
**kwargs: Temporary attribute overrides (max_turns, cwd, permission_mode, etc.)

Returns:
Result string from execution or error message

Note:
Attributes modified via kwargs are restored after execution on a "best effort" basis.
In rare cases, restoration may fail to avoid masking the primary execution error.
"""
if not request:
return "Error: No request provided"

self._current_prompt = request
self._messages = []
self._session_id = None
self._total_cost_usd = 0.0

# Whitelist of attributes that can be modified via kwargs
modifiable_attrs = {
'max_turns', 'cwd', 'permission_mode', 'allowed_tools',
'system_prompt_override', 'append_system_prompt'
}

# Safely modify attributes with validation
original_values = {}
for key, value in kwargs.items():
if key not in modifiable_attrs:
return f"Error: Attribute '{key}' cannot be modified via kwargs. Allowed: {sorted(modifiable_attrs)}"

if not hasattr(self, key):
return f"Error: Unknown attribute '{key}'"
Comment on lines +311 to +312
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as in CodexAgent - the error message for unknown attributes is misleading. It says "Unknown attribute" when the attribute exists but is not in the modifiable_attrs whitelist. The message should clarify that the attribute cannot be modified via kwargs.

Copilot uses AI. Check for mistakes.

try:
original_values[key] = getattr(self, key)
setattr(self, key, value)

# Validate critical attributes after modification
if key == 'cwd':
if isinstance(self.cwd, str):
self.cwd = Path(self.cwd)
self._validate_cwd()
elif key == 'permission_mode':
valid_modes = ["default", "acceptEdits", "bypassPermissions", "plan"]
if value not in valid_modes:
raise ValueError(f"Invalid permission_mode: {value}. Must be one of: {valid_modes}")
Comment on lines 319 to 348
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to CodexAgent, the cwd parameter can be set via kwargs without proper sanitization. A user could set cwd to sensitive directories and potentially access or modify system files. The security implications are especially concerning when combined with permissive permission_mode settings.

Suggested change
setattr(self, key, value)
# Validate critical attributes after modification
if key == 'cwd':
if isinstance(self.cwd, str):
self.cwd = Path(self.cwd)
self._validate_cwd()
elif key == 'permission_mode':
valid_modes = ["default", "acceptEdits", "bypassPermissions", "plan"]
if value not in valid_modes:
raise ValueError(f"Invalid permission_mode: {value}. Must be one of: {valid_modes}")
# Special handling and validation for cwd to avoid unsafe directories
if key == 'cwd':
# Normalize incoming cwd value to a Path
if isinstance(value, str):
new_cwd = Path(value)
elif isinstance(value, Path):
new_cwd = value
else:
raise ValueError("cwd must be a string or pathlib.Path")
# Disallow absolute paths to prevent pointing cwd to arbitrary sensitive locations
if new_cwd.is_absolute():
raise ValueError("cwd must be a relative path, absolute paths are not allowed")
# Prevent directory traversal via parent directory components
if ".." in new_cwd.parts:
raise ValueError("cwd cannot contain parent directory references ('..')")
# Resolve the new cwd relative to the current cwd
base_cwd = self.cwd
if isinstance(base_cwd, str):
base_cwd = Path(base_cwd)
resolved_cwd = (base_cwd / new_cwd).resolve()
self.cwd = resolved_cwd
self._validate_cwd()
else:
setattr(self, key, value)
# Validate critical attributes after modification
if key == 'permission_mode':
valid_modes = ["default", "acceptEdits", "bypassPermissions", "plan"]
if value not in valid_modes:
raise ValueError(f"Invalid permission_mode: {value}. Must be one of: {valid_modes}")

Copilot uses AI. Check for mistakes.

except Exception as e:
# If validation fails, restore any attributes set so far
for restore_key, restore_value in original_values.items():
try:
setattr(self, restore_key, restore_value)
except Exception:
pass # Best effort restoration
return f"Error: Failed to set attribute '{key}': {str(e)}"
Comment on lines +350 to +357
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error handling in run() method's kwargs restoration (lines 255-262) has the same issue as CodexAgent - it silently swallows all exceptions during restoration attempts. If setattr fails for any attribute, the agent could be left in an inconsistent state with some attributes modified and others not.

Copilot uses AI. Check for mistakes.

try:
options = self._create_options()
return await self._process_query_stream(request, options)
except Exception as e:
return self._handle_sdk_error(e)
finally:
# Restore original values (best effort)
for key, value in original_values.items():
try:
setattr(self, key, value)
except Exception:
pass # Ignore errors during restoration
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to CodexAgent, the run method modifies instance attributes without synchronization. If multiple coroutines call run() concurrently on the same ClaudeCodeAgent instance, they could interfere with each other's attribute modifications and restoration. Document that the agent is not safe for concurrent use or implement proper locking.

Copilot uses AI. Check for mistakes.

Comment on lines 359 to 376
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to CodexAgent, the finally block silently ignores all exceptions during attribute restoration (lines 269-275). This means if restoration fails after successful execution, the agent will remain in a modified state, violating the contract that kwargs provide temporary overrides. This is the same issue as in CodexAgent.

Suggested change
try:
options = self._create_options()
return await self._process_query_stream(request, options)
except Exception as e:
return self._handle_sdk_error(e)
finally:
# Restore original values (best effort)
for key, value in original_values.items():
try:
setattr(self, key, value)
except Exception:
pass # Ignore errors during restoration
primary_exc: Optional[BaseException] = None
result: str
restoration_errors = []
try:
options = self._create_options()
result = await self._process_query_stream(request, options)
except Exception as e:
primary_exc = e
result = self._handle_sdk_error(e)
finally:
# Restore original values (best effort)
for key, value in original_values.items():
try:
setattr(self, key, value)
except Exception as restore_exc:
# Record restoration errors; they may be surfaced if there was no primary error
restoration_errors.append((key, restore_exc))
# If the main execution succeeded but restoration failed, report the restoration error
if primary_exc is None and restoration_errors:
failed_key, failed_exc = restoration_errors[0]
return f"Error: Failed to restore attribute '{failed_key}': {failed_exc}"
return result

Copilot uses AI. Check for mistakes.
async def __call__(self, **kwargs) -> str:
"""Execute the agent with given parameters."""
request = kwargs.pop('request', None) or kwargs.pop('task', None) or kwargs.pop('prompt', None)
Comment on lines +378 to +379
Copy link

Copilot AI Dec 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as CodexAgent - the call method accepts three different parameter names (request, task, prompt) which can lead to ambiguity. If multiple are provided, the precedence is undocumented. Consider standardizing on a single parameter name or clearly documenting the precedence order.

Suggested change
"""Execute the agent with given parameters."""
request = kwargs.pop('request', None) or kwargs.pop('task', None) or kwargs.pop('prompt', None)
"""Execute the agent with given parameters.
Accepts a single text argument under one of the aliases: 'request', 'task', or 'prompt'.
If more than one of these is provided, a ValueError is raised to avoid ambiguity.
"""
request = kwargs.pop("request", None)
task = kwargs.pop("task", None)
prompt = kwargs.pop("prompt", None)
# Ensure we don't silently ignore conflicting inputs
provided_count = sum(v is not None for v in (request, task, prompt))
if provided_count > 1:
raise ValueError(
"ClaudeCodeAgent.__call__ accepts only one of 'request', 'task', or 'prompt'. "
f"Received: request={request!r}, task={task!r}, prompt={prompt!r}"
)
# Preserve existing precedence: request > task > prompt
if request is None:
request = task if task is not None else prompt

Copilot uses AI. Check for mistakes.
return await self.run(request=request, **kwargs)

def get_session_info(self) -> Dict[str, Any]:
"""Get information about the current session."""
return {
"session_id": self._session_id,
"total_cost_usd": self._total_cost_usd,
"num_messages": len(self._messages),
"cwd": str(self.cwd),
"max_turns": self.max_turns,
"permission_mode": self.permission_mode,
}

def get_messages(self) -> List[Any]:
"""Get all messages from the current session."""
return self._messages.copy()

def reset(self) -> None:
"""Reset the agent state for a new session."""
self._messages = []
self._session_id = None
self._total_cost_usd = 0.0
self._current_prompt = None

Loading