diff --git a/README.md b/README.md
index 1a41a38..0741981 100644
--- a/README.md
+++ b/README.md
@@ -80,7 +80,7 @@ We also generate multimodal skin based on the rules of the same maze. Generated
 - [ ] Add Multimodal Environment Generation Pipelines.
 - [ ] Add Three-stage Verification Pipeline for both text and multimodal environments.
 - [ ] Add Learning Experiments Scripts.
-- [ ] Add Coding Agents Option: Codex, Gemini Cli.
+- [x] Add Coding Agents Option: Codex, Claude Code SDK.
 - [ ] Add 3D Environment Generation Pipelines.
 
 ## Repository Layout
@@ -129,6 +129,200 @@ python run_environment_skin_generation.py
 
 Cost summaries are automatically saved to `workspace/costs/`.
 
+## Coding Agents
+
+AutoEnv supports multiple coding agent backends for environment code generation and fixing. The code agent is used in the pipeline's CodeFixNode, LevelGenNode, and MaxRewardNode stages.
+
+### Backend Options
+
+| Backend   | Description                                                                  | Best For                            |
+| --------- | ---------------------------------------------------------------------------- | ----------------------------------- |
+| `miniswe` | Default [mini-swe-agent](https://github.com/SWE-agent/mini-swe-agent)        | General use, works with any LLM     |
+| `codex`   | OpenAI [Codex CLI](https://github.com/openai/codex)                          | OpenAI API users, fast execution    |
+| `claude`  | Anthropic [Claude Agent SDK](https://github.com/anthropics/claude-agent-sdk) | Anthropic API users, custom proxies |
+
+### Configuration
+
+Set the `code_agent_backend` field in `config/env_gen.yaml`:
+
+```yaml
+# Code agent backend: "miniswe" (default), "codex", "claude"
+code_agent_backend: "codex" # or "claude" or "miniswe"
+```
+
+### MiniSWE Agent (Default)
+
+The default agent using mini-swe-agent. Works with any LLM configured in `config/model_config.yaml`.
+
+```yaml
+# config/env_gen.yaml
+code_agent_backend: "miniswe"
+model: "gpt-4o" # or any configured LLM
+```
+
+No additional setup required beyond the standard model configuration.
+
+### Codex Agent
+
+Uses OpenAI's official Codex CLI for code generation.
+
+#### Prerequisites
+
+```bash
+# Install Codex CLI
+npm install -g @openai/codex
+# or
+brew install --cask codex
+
+# Authenticate (recommended)
+codex login
+
+# Verify installation
+codex whoami
+```
+
+#### Authentication Options
+
+**Option 1: CLI Login (Recommended)**
+
+```bash
+codex login  # Opens browser for OAuth
+```
+
+**Option 2: Environment Variable**
+
+```bash
+export OPENAI_API_KEY=your-api-key
+```
+
+**Option 3: Custom Base URL** (for proxies)
+
+```bash
+export OPENAI_API_KEY=your-api-key
+export OPENAI_BASE_URL=https://your-proxy.example.com/v1
+```
+
+#### Configuration
+
+```yaml
+# config/env_gen.yaml
+code_agent_backend: "codex"
+```
+
+### Claude Agent
+
+Uses Anthropic's Claude Agent SDK for code generation.
+
+#### Prerequisites
+
+```bash
+# Install Claude Agent SDK
+pip install claude-agent-sdk
+```
+
+The Python SDK authenticates via environment variables (see Authentication Options below).
+
+#### Authentication Options
+
+**Option 1: Environment Variables (Recommended)**
+
+```bash
+export ANTHROPIC_API_KEY=your-api-key
+```
+
+**Option 2: Custom Base URL** (for proxies)
+
+```bash
+export ANTHROPIC_API_KEY=your-api-key
+export ANTHROPIC_BASE_URL=https://your-proxy.example.com/api
+```
+
+#### Configuration
+
+```yaml
+# config/env_gen.yaml
+code_agent_backend: "claude"
+```
+
+### Generate Environment with Code Agent
+
+#### Quick Start
+
+```bash
+# 1. Copy example config
+cp config/env_gen_example.yaml config/env_gen.yaml
+
+# 2. Edit config (set code_agent_backend, theme, etc.)
+vim config/env_gen.yaml
+
+# 3. Run environment generation
+python run_environment_generation.py
+```
+
+#### Example Configuration
+
+```yaml
+# config/env_gen.yaml
+mode: "textual"
+model: "gpt-4o"
+concurrency: 1
+theme: "A strategic puzzle game with resource management"
+envs_root_path: "workspace/envs"
+code_agent_backend: "codex" # Use Codex for code generation
+```
+
+#### Background Execution (Recommended for Long Tasks)
+
+Code agents can take 10-30 minutes for complex environments. Run in background:
+
+```bash
+# Run in background with logging
+nohup python run_environment_generation.py > /tmp/autoenv_gen.log 2>&1 &
+
+# Monitor progress
+tail -f /tmp/autoenv_gen.log
+
+# Check if complete
+ls workspace/envs/*/done.txt
+```
+
+### Troubleshooting
+
+#### Codex CLI Issues
+
+```bash
+# Check if Codex is installed
+codex --version
+
+# Re-authenticate
+codex logout && codex login
+
+# Check current user
+codex whoami
+```
+
+#### Claude Agent Issues
+
+```bash
+# Check if Python SDK is installed
+pip show claude-agent-sdk
+
+# Verify API key
+echo $ANTHROPIC_API_KEY
+
+# Test import
+python -c "from claude_agent_sdk import query; print('SDK available')"
+```
+
+#### Timeout Issues
+
+For complex environments, increase timeout in `autoenv/coder.py`:
+
+```python
+# Current default: 900 seconds (15 minutes)
+agent = CodexAgent(timeout=1200)  # 20 minutes
+```
+
 ## Benchmarking AutoEnv-36
 
 Evaluate agents on the 36 benchmark environments (scores for all; cost only for LLM branch). See `benchmarks/README.md` for details.
@@ -160,9 +354,10 @@ Programmatic APIs are available in `benchmarks/api.py` (`benchmark_llms`, `bench
 
 ## Acknowledgements
 
-Thanks to 
-[mini-swe-agent](https://github.com/SWE-agent/mini-swe-agent), 
-[codex](https://github.com/openai/codex), 
+Thanks to
+[mini-swe-agent](https://github.com/SWE-agent/mini-swe-agent),
+[codex](https://github.com/openai/codex),
+[claude-agent-sdk](https://github.com/anthropics/claude-agent-sdk-python),
 [rembg](https://github.com/danielgatis/rembg),
 for providing basic support for this project!
 
diff --git a/autoenv/claude_code_agent.py b/autoenv/claude_code_agent.py
new file mode 100644
index 0000000..b2943dd
--- /dev/null
+++ b/autoenv/claude_code_agent.py
@@ -0,0 +1,403 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from pydantic import Field, model_validator, PrivateAttr
+
+from base.agent.base_agent import BaseAgent
+
+
+try:
+    from claude_agent_sdk import (
+        query,
+        ClaudeAgentOptions,
+        ClaudeSDKError,
+        CLINotFoundError,
+        ProcessError,
+        CLIJSONDecodeError,
+    )
+    CLAUDE_AGENT_AVAILABLE = True
+except ImportError:
+    CLAUDE_AGENT_AVAILABLE = False
+    # Fallback exception definitions that maintain proper inheritance
+    # These match the SDK's exception hierarchy for consistent error handling
+    class ClaudeSDKError(Exception):
+        """Base exception for Claude SDK errors."""
+        pass
+    
+    class CLINotFoundError(ClaudeSDKError):
+        """Exception raised when Claude CLI is not found."""
+        pass
+    
+    class ProcessError(ClaudeSDKError):
+        """Exception raised when process execution fails."""
+        def __init__(self, message="", exit_code=-1):
+            super().__init__(message)
+            self.exit_code = exit_code
+    
+    class CLIJSONDecodeError(ClaudeSDKError):
+        """Exception raised when JSON decoding fails."""
+        pass
+    
+    # Stub for query and ClaudeAgentOptions when SDK is unavailable
+    query = None
+    ClaudeAgentOptions = None
+
+
+class ClaudeCodeAgent(BaseAgent):
+    """Claude Code Agent for code generation using Claude Agent SDK."""
+    
+    name: str = Field(default="claude_code", description="Agent name")
+    description: str = Field(
+        default="Claude Code agent for code generation and execution",
+        description="Agent description"
+    )
+    
+    # Claude Code specific settings
+    # NOTE: Uses `max_turns` for conversation turns with Claude Code CLI (vs BaseAgent's generic `max_steps`)
+    max_turns: int = Field(default=10, description="Maximum conversation turns")
+    max_messages: int = Field(
+        default=1000,
+        description="Maximum number of messages to keep in history. Older messages are discarded."
+    )
+    cwd: Optional[Path] = Field(default=None, description="Working directory")
+    allowed_tools: Optional[List[str]] = Field(
+        default=None,
+        description="Allowed tools (e.g., ['Read', 'Write', 'Bash'])"
+    )
+    permission_mode: str = Field(
+        default="acceptEdits",
+        description=(
+            "Permission mode controlling agent's action execution behavior.\n"
+            "- 'default': Interactive mode, requests confirmation before actions\n"
+            "- 'acceptEdits': Auto-accepts code edits, still confirms dangerous operations\n"
+            "- 'bypassPermissions': ⚠️ DANGEROUS - Bypasses all permission checks and executes "
+            "actions without confirmation. Only use in fully trusted, isolated environments.\n"
+            "- 'plan': Planning mode, generates action plans without executing"
+        )
+    )
+    system_prompt_override: Optional[str] = Field(
+        default=None,
+        description="Override system prompt (only for non-interactive mode)"
+    )
+    append_system_prompt: Optional[str] = Field(
+        default=None,
+        description="Append to system prompt (only for non-interactive mode)"
+    )
+    api_base_url: Optional[str] = Field(
+        default=None,
+        description="Custom API base URL (e.g., for proxies or API gateways)"
+    )
+    api_key: Optional[str] = Field(
+        default=None,
+        description="API key for authentication. If not provided, uses ANTHROPIC_API_KEY env var"
+    )
+    env_vars: Optional[Dict[str, str]] = Field(
+        default=None,
+        description="Additional environment variables to pass to Claude CLI"
+    )
+    
+    class Config:
+        arbitrary_types_allowed = True
+
+    # Private attributes for internal state
+    _messages: List[Any] = PrivateAttr(default_factory=list)
+    _session_id: Optional[str] = PrivateAttr(default=None)
+    _total_cost_usd: float = PrivateAttr(default=0.0)
+    _current_prompt: Optional[str] = PrivateAttr(default=None)
+    _max_messages: int = PrivateAttr(default=1000)  # Limit message history to prevent unbounded growth
+
+    @model_validator(mode="after")
+    def validate_claude_agent_available(self) -> "ClaudeCodeAgent":
+        """Validate that Claude Agent SDK is available."""
+        if not CLAUDE_AGENT_AVAILABLE:
+            raise ImportError(
+                "Claude Agent SDK is not installed. "
+                "Install it with: pip install claude-agent-sdk\n"
+                "Note: The Claude Code CLI is automatically bundled - no separate installation needed!"
+            )
+
+        # Set default cwd if not provided
+        if self.cwd is None:
+            self.cwd = Path.cwd()
+        else:
+            self.cwd = Path(self.cwd)
+
+        # Validate working directory exists
+        self._validate_cwd()
+
+        # Validate permission mode
+        valid_modes = ["default", "acceptEdits", "bypassPermissions", "plan"]
+        if self.permission_mode not in valid_modes:
+            raise ValueError(
+                f"Invalid permission_mode: {self.permission_mode}. "
+                f"Must be one of: {valid_modes}"
+            )
+
+        return self
+
+    def _validate_cwd(self) -> None:
+        """Validate working directory exists and is a directory."""
+        if not self.cwd.exists():
+            raise FileNotFoundError(f"Working directory does not exist: {self.cwd}")
+        if not self.cwd.is_dir():
+            raise NotADirectoryError(f"Working directory path is not a directory: {self.cwd}")
+
+    def _create_options(self) -> ClaudeAgentOptions:
+        """Create ClaudeAgentOptions from agent settings."""
+        options_dict = {
+            "max_turns": self.max_turns,
+            "cwd": str(self.cwd),
+            "permission_mode": self.permission_mode,
+        }
+
+        if self.allowed_tools:
+            options_dict["allowed_tools"] = self.allowed_tools
+        if self.system_prompt_override:
+            options_dict["system_prompt"] = self.system_prompt_override
+        elif self.append_system_prompt:
+            options_dict["append_system_prompt"] = self.append_system_prompt
+        
+        # Build environment variables for Claude CLI
+        env_dict = {}
+        if self.env_vars:
+            env_dict.update(self.env_vars)
+        
+        # Set API key if provided
+        if self.api_key:
+            env_dict["ANTHROPIC_API_KEY"] = self.api_key
+        
+        # Set base URL if provided (Claude CLI respects ANTHROPIC_BASE_URL)
+        if self.api_base_url:
+            env_dict["ANTHROPIC_BASE_URL"] = self.api_base_url
+        
+        if env_dict:
+            options_dict["env"] = env_dict
+
+        return ClaudeAgentOptions(**options_dict)
+
+    def _handle_sdk_error(self, e: Exception) -> str:
+        """Handle Claude SDK errors with consistent formatting.
+        
+        Only converts expected SDK errors to user-friendly strings.
+        Re-raises unexpected exceptions (programming errors) for proper debugging.
+        
+        Args:
+            e: The exception to handle
+            
+        Returns:
+            Formatted error message string for SDK errors
+            
+        Raises:
+            Exception: Re-raises non-SDK exceptions for proper error tracking
+        """
+        if isinstance(e, CLINotFoundError):
+            return f"Error: Claude Code CLI not found: {str(e)}"
+        elif isinstance(e, ProcessError):
+            exit_code = getattr(e, 'exit_code', 'unknown')
+            return f"Error: Process failed with exit code {exit_code}: {str(e)}"
+        elif isinstance(e, CLIJSONDecodeError):
+            return f"Error: Failed to parse Claude response: {str(e)}"
+        elif isinstance(e, ClaudeSDKError):
+            return f"Error: Claude SDK error: {str(e)}"
+        else:
+            # Re-raise unexpected exceptions (programming errors, etc.) for proper debugging
+            raise
+
+    async def _process_query_stream(self, prompt: str, options: ClaudeAgentOptions) -> str:
+        """Process the query stream and extract result.
+        
+        Handles message iteration, session tracking, cost accumulation, and result extraction.
+        This is shared logic between step() and run() methods.
+        
+        Args:
+            prompt: The prompt to send to Claude
+            options: Claude agent options
+            
+        Returns:
+            Result text from the query
+        """
+        result_text = ""
+        async for message in query(prompt=prompt, options=options):
+            # Enforce max_messages limit to prevent unbounded memory growth
+            if len(self._messages) >= self._max_messages:
+                # Remove oldest message to maintain sliding window
+                self._messages.pop(0)
+            self._messages.append(message)
+
+            if hasattr(message, 'session_id'):
+                self._session_id = message.session_id
+
+            # Check if this is a result message (by class name or type attribute)
+            is_result_message = (
+                type(message).__name__ == "ResultMessage" or
+                (hasattr(message, 'type') and message.type == "result")
+            )
+            
+            if not is_result_message:
+                continue
+
+            # Track costs for result messages
+            if hasattr(message, 'total_cost_usd'):
+                self._total_cost_usd += message.total_cost_usd
+
+            # Capture result if available
+            if hasattr(message, 'result') and message.result:
+                result_text = message.result
+            # Handle error or completion subtypes
+            elif hasattr(message, 'subtype'):
+                if message.subtype == "error_max_turns":
+                    result_text = f"Error: Reached maximum turns ({self.max_turns})"
+                elif message.subtype == "error_during_execution":
+                    result_text = "Error: Execution failed"
+                elif message.subtype == "success":
+                    result_text = getattr(message, 'result', "Execution completed successfully")
+                else:
+                    result_text = f"Completed with status: {message.subtype}"
+            else:
+                result_text = "Execution completed"
+
+        return result_text if result_text else "No result received"
+
+    async def step(self) -> str:
+        """Execute a single step in the agent's workflow."""
+        if not self._current_prompt:
+            return "No prompt provided. Use run() method to execute tasks."
+
+        try:
+            options = self._create_options()
+            return await self._process_query_stream(self._current_prompt, options)
+        except Exception as e:
+            return self._handle_sdk_error(e)
+
+    async def run(self, request: Optional[str] = None, **kwargs) -> str:
+        """Execute the agent's main loop asynchronously.
+        
+        Args:
+            request: The task or prompt to execute
+            **kwargs: Temporary attribute overrides (max_turns, cwd, permission_mode, etc.)
+                     
+        Returns:
+            Result string from execution or error message
+            
+        Warning:
+            This agent is NOT safe for concurrent use. Do not call run() from multiple
+            coroutines simultaneously on the same agent instance, as attribute modifications
+            will interfere with each other.
+            
+        Note:
+            Attributes modified via kwargs are restored after execution on a "best effort" basis.
+            In rare cases, restoration may fail to avoid masking the primary execution error.
+        """
+        if not request:
+            return "Error: No request provided"
+
+        self._current_prompt = request
+        self._messages = []
+        self._session_id = None
+        self._total_cost_usd = 0.0
+
+        # Whitelist of attributes that can be modified via kwargs
+        modifiable_attrs = {
+            'max_turns', 'cwd', 'permission_mode', 'allowed_tools',
+            'system_prompt_override', 'append_system_prompt',
+            'api_base_url', 'api_key', 'env_vars'
+        }
+
+        # Safely modify attributes with validation
+        original_values = {}
+        for key, value in kwargs.items():
+            if not hasattr(self, key):
+                return f"Error: Unknown attribute '{key}'"
+            
+            if key not in modifiable_attrs:
+                return f"Error: Attribute '{key}' cannot be modified via kwargs. Allowed: {sorted(modifiable_attrs)}"
+                
+            try:
+                original_values[key] = getattr(self, key)
+                setattr(self, key, value)
+                
+                # Validate critical attributes after modification
+                if key == 'cwd':
+                    # Security: Validate and sanitize cwd to prevent directory traversal attacks
+                    if isinstance(value, str):
+                        new_cwd = Path(value)
+                    elif isinstance(value, Path):
+                        new_cwd = value
+                    else:
+                        raise ValueError("cwd must be a string or pathlib.Path")
+                    
+                    # Security: Prevent absolute paths to sensitive directories
+                    if new_cwd.is_absolute():
+                        raise ValueError(
+                            "For security reasons, cwd cannot be set to an absolute path via kwargs. "
+                            "Set cwd during agent initialization instead."
+                        )
+                    
+                    # Security: Prevent directory traversal via '..' 
+                    if ".." in new_cwd.parts:
+                        raise ValueError("cwd cannot contain parent directory references ('..')")
+                    
+                    # Resolve relative to current cwd and validate
+                    self.cwd = (self.cwd / new_cwd).resolve()
+                    self._validate_cwd()
+                elif key == 'permission_mode':
+                    valid_modes = ["default", "acceptEdits", "bypassPermissions", "plan"]
+                    if value not in valid_modes:
+                        raise ValueError(f"Invalid permission_mode: {value}. Must be one of: {valid_modes}")
+                        
+            except Exception as e:
+                # If validation fails, restore any attributes set so far
+                for restore_key, restore_value in original_values.items():
+                    try:
+                        setattr(self, restore_key, restore_value)
+                    except Exception:
+                        pass  # Best effort restoration
+                return f"Error: Failed to set attribute '{key}': {str(e)}"
+
+        try:
+            options = self._create_options()
+            return await self._process_query_stream(request, options)
+        except Exception as e:
+            return self._handle_sdk_error(e)
+        finally:
+            # Restore original values (best effort)
+            restoration_failures = []
+            for key, value in original_values.items():
+                try:
+                    setattr(self, key, value)
+                except Exception as e:
+                    # Track restoration failures for potential debugging
+                    restoration_failures.append(f"{key}: {e}")
+            
+            # Note: We don't raise restoration errors to avoid masking the primary execution result.
+            # In production, consider logging restoration_failures for debugging.
+
+    async def __call__(self, **kwargs) -> str:
+        """Execute the agent with given parameters."""
+        request = kwargs.pop('request', None) or kwargs.pop('task', None) or kwargs.pop('prompt', None)
+        return await self.run(request=request, **kwargs)
+
+    def get_session_info(self) -> Dict[str, Any]:
+        """Get information about the current session."""
+        return {
+            "session_id": self._session_id,
+            "total_cost_usd": self._total_cost_usd,
+            "num_messages": len(self._messages),
+            "cwd": str(self.cwd),
+            "max_turns": self.max_turns,
+            "permission_mode": self.permission_mode,
+        }
+
+    def get_messages(self) -> List[Any]:
+        """Get all messages from the current session."""
+        return self._messages.copy()
+
+    def reset(self) -> None:
+        """Reset the agent state for a new session."""
+        self._messages = []
+        self._session_id = None
+        self._total_cost_usd = 0.0
+        self._current_prompt = None
+
diff --git a/autoenv/coder.py b/autoenv/coder.py
index 061502d..be08949 100644
--- a/autoenv/coder.py
+++ b/autoenv/coder.py
@@ -1,51 +1,80 @@
-from typing import Optional
+from typing import Optional, Literal
 import os
 
 from base.agent.base_agent import BaseAgent
 from autoenv.miniswe_agent import MiniSWEAutoEnvAgent
 
-class ECodeAgent(BaseAgent):
-    """Agent that asks mini-swe-agent to generate levels inside Docker.
 
-    - Mounts the target env folder at /workspace.
-    - Mounts repo root at /repo and sets PYTHONPATH for imports.
-    - Uses a persistent pip cache to speed up runs.
-    - Instructs the assistant to fix scripts when needed, generate 100 levels to ./levels, validate, then finish.
+class ECodeAgent(BaseAgent):
+    """Agent for code generation tasks in AutoEnv.
+    
+    Supports multiple backends via Strategy Pattern:
+        - miniswe (default): Uses MiniSWEAutoEnvAgent, works with any LLM
+        - codex: Uses OpenAI Codex CLI (requires OPENAI_API_KEY)
+        - claude: Uses Claude Code CLI (requires ANTHROPIC_API_KEY)
+    
+    All backends implement the same BaseAgent.run(request=...) interface.
     """
 
     name: str = "coder"
     desc: str = "A minimal coder for AutoEnv-generated environments"
+    
+    # Backend selection: "miniswe" (default), "codex", or "claude"
+    backend: Literal["miniswe", "codex", "claude"] = "miniswe"
 
-    async def __call__(self, requirements: Optional[str] = None, cwds: Optional[str] = None, environment_type: Optional[str] = "local") -> str:
-        # Resolve paths
-        repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-        base_env = {"PYTHONPATH": os.pathsep.join([repo_root, os.environ.get("PYTHONPATH", "")]).strip(os.pathsep)}
-
-        if environment_type == "docker":
-            agent = MiniSWEAutoEnvAgent(
-                llm=self.llm,  # Pass the LLM instance from BaseAgent
-                mode="yolo",
-                step_limit=50,
-                environment_type="docker",
-                cwd = cwds,
-                env = base_env,
-                timeout = 900,
-                docker_image="python:3.11-slim",
+    def _create_agent(self, cwds: str, environment_type: str = "local") -> BaseAgent:
+        """Factory method to create the appropriate agent based on backend."""
+        if self.backend == "codex":
+            from autoenv.codex_agent import CodexAgent
+            agent = CodexAgent(cwd=cwds, permission_mode="acceptEdits", timeout=900)  # 15 min timeout for complex tasks
+        
+        elif self.backend == "claude":
+            from autoenv.claude_code_agent import ClaudeCodeAgent
+            # Check environment variables for custom API configuration
+            api_base_url = os.environ.get("ANTHROPIC_BASE_URL")
+            api_key = os.environ.get("ANTHROPIC_API_KEY")
+            agent = ClaudeCodeAgent(
+                cwd=cwds,
+                permission_mode="bypassPermissions",  # Allow file operations without confirmation
+                api_base_url=api_base_url,
+                api_key=api_key,
+                max_turns=50  # Allow more turns for complex tasks
             )
-        elif environment_type == "local":
-            agent = MiniSWEAutoEnvAgent(
-                llm=self.llm,  # Pass the LLM instance from BaseAgent
-                mode="yolo",
-                step_limit=100,
-                environment_type="local",
-                cwd = cwds,
-                env = base_env,
-                timeout = 900,
-            )
-        else:
-            raise ValueError(f"Unsupported environment_type: {environment_type}")
+        
+        else:  # miniswe (default)
+            repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+            base_env = {"PYTHONPATH": os.pathsep.join([repo_root, os.environ.get("PYTHONPATH", "")]).strip(os.pathsep)}
+            
+            if environment_type == "docker":
+                agent = MiniSWEAutoEnvAgent(
+                    llm=self.llm,  # Pass the LLM instance from BaseAgent
+                    mode="yolo",
+                    step_limit=50,
+                    environment_type="docker",
+                    cwd = cwds,
+                    env = base_env,
+                    timeout = 900,
+                    docker_image="python:3.11-slim",
+                )
+            elif environment_type == "local":
+                agent = MiniSWEAutoEnvAgent(
+                    llm=self.llm,  # Pass the LLM instance from BaseAgent
+                    mode="yolo",
+                    step_limit=100,
+                    environment_type="local",
+                    cwd = cwds,
+                    env = base_env,
+                    timeout = 900,
+                )
+            else:
+                raise ValueError(f"Unsupported environment_type: {environment_type}")
+        
+        return agent
 
-        return await agent.run(task=requirements)
+    async def __call__(self, requirements: Optional[str] = None, cwds: Optional[str] = None, environment_type: Optional[str] = "local") -> str:
+        """Execute code task with configured backend."""
+        agent = self._create_agent(cwds, environment_type)
+        return await agent.run(request=requirements)
 
     # BaseAgent abstract methods
     async def step(self) -> str:
diff --git a/autoenv/codex_agent.py b/autoenv/codex_agent.py
new file mode 100644
index 0000000..267809f
--- /dev/null
+++ b/autoenv/codex_agent.py
@@ -0,0 +1,433 @@
+from __future__ import annotations
+
+import asyncio
+import json
+import os
+import subprocess
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from pydantic import Field, model_validator, PrivateAttr
+
+from base.agent.base_agent import BaseAgent
+
+
+# Check if Codex CLI is available
+def _check_codex_cli() -> bool:
+    """Check if Codex CLI (codex command) is installed."""
+    try:
+        result = subprocess.run(
+            ["codex", "--version"],
+            capture_output=True,
+            timeout=5
+        )
+        return result.returncode == 0
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        return False
+
+
+# Lazily-evaluated flag indicating whether the Codex CLI is available.
+CODEX_CLI_AVAILABLE: Optional[bool] = None
+
+
+def is_codex_cli_available(force_recheck: bool = False) -> bool:
+    """Check if Codex CLI is available (cached after first call).
+    
+    Args:
+        force_recheck: If True, bypass cache and recheck CLI availability
+        
+    Returns:
+        True if Codex CLI is available, False otherwise
+    """
+    global CODEX_CLI_AVAILABLE
+    if CODEX_CLI_AVAILABLE is None or force_recheck:
+        CODEX_CLI_AVAILABLE = _check_codex_cli()
+    return CODEX_CLI_AVAILABLE
+
+
+class CodexAgent(BaseAgent):
+    """Codex agent for code generation using a Codex CLI tool."""
+    
+    name: str = Field(default="codex", description="Agent name")
+    description: str = Field(
+        default="Codex agent for code generation and execution",
+        description="Agent description"
+    )
+    
+    # Codex specific settings
+    # NOTE: Uses `max_turns` for conversation turns with Codex CLI (vs BaseAgent's generic `max_steps`).
+    max_turns: int = Field(default=10, description="Maximum conversation turns")
+    cwd: Optional[Path] = Field(default=None, description="Working directory")
+    model: Optional[str] = Field(
+        default=None,
+        description="Model identifier for the Codex CLI. If not specified, uses the CLI's default model."
+    )
+    permission_mode: str = Field(
+        default="acceptEdits",
+        description=(
+            "Permission mode controlling agent's action execution behavior.\n"
+            "- 'default': Interactive mode, requests confirmation before actions\n"
+            "- 'acceptEdits': Auto-accepts code edits, still confirms dangerous operations\n"
+            "- 'bypassPermissions': ⚠️ DANGEROUS - Bypasses all permission checks and executes "
+            "actions without confirmation. Only use in fully trusted, isolated environments.\n"
+            "- 'plan': Planning mode, generates action plans without executing"
+        )
+    )
+    timeout: int = Field(
+        default=300,
+        description="Timeout in seconds for CLI commands"
+    )
+    api_key: Optional[str] = Field(
+        default=None,
+        description=(
+            "Optional API key for Codex CLI. If set, passed via OPENAI_API_KEY environment variable. "
+            "If not set, CLI uses its own login state (codex login)."
+        )
+    )
+    
+    class Config:
+        arbitrary_types_allowed = True
+    
+    # Private attributes for internal state
+    _messages: List[Any] = PrivateAttr(default_factory=list)
+    _session_id: Optional[str] = PrivateAttr(default=None)
+    _total_cost_usd: float = PrivateAttr(default=0.0)
+    _current_prompt: Optional[str] = PrivateAttr(default=None)
+    
+    @model_validator(mode="after")
+    def validate_codex_cli_available(self) -> "CodexAgent":
+        """Validate that Codex CLI is available."""
+        # Force recheck to handle case where CLI was installed after module import
+        if not is_codex_cli_available(force_recheck=True):
+            raise ImportError(
+                "Codex CLI is not installed or not available on PATH. "
+                "Please install the 'codex' command-line tool required for this project "
+                "and ensure it is accessible in your PATH, then try again. "
+                "Refer to your project documentation for the correct installation instructions."
+            )
+
+        # Set default cwd if not provided
+        if self.cwd is None:
+            self.cwd = Path.cwd()
+        else:
+            self.cwd = Path(self.cwd)
+
+        # Validate working directory exists
+        self._validate_cwd()
+
+        # Validate permission mode
+        valid_modes = ["default", "acceptEdits", "bypassPermissions", "plan"]
+        if self.permission_mode not in valid_modes:
+            raise ValueError(
+                f"Invalid permission_mode: {self.permission_mode}. "
+                f"Must be one of: {valid_modes}"
+            )
+
+        return self
+
+    def _validate_cwd(self) -> None:
+        """Validate working directory exists and is a directory."""
+        if not self.cwd.exists():
+            raise FileNotFoundError(f"Working directory does not exist: {self.cwd}")
+        if not self.cwd.is_dir():
+            raise NotADirectoryError(f"Working directory path is not a directory: {self.cwd}")
+
+    def _build_cli_command(self, prompt: str) -> List[str]:
+        """Build Codex CLI command."""
+        cmd = ["codex", "exec"]
+
+        if self.model:
+            cmd.extend(["-m", self.model])
+        if self.cwd:
+            cmd.extend(["-C", str(self.cwd)])
+
+        sandbox_map = {
+            "default": "read-only",
+            "acceptEdits": "workspace-write",
+            "bypassPermissions": "danger-full-access",
+            "plan": "read-only"
+        }
+        cmd.extend(["-s", sandbox_map.get(self.permission_mode, "read-only")])
+
+        if self.permission_mode == "bypassPermissions":
+            cmd.append("--dangerously-bypass-approvals-and-sandbox")
+        elif self.permission_mode == "acceptEdits":
+            cmd.append("--full-auto")
+
+        cmd.append("--json")
+        # Use "--" so prompt content (even starting with "--") is treated as positional, not CLI flags
+        cmd.append("--")
+        cmd.append(prompt)
+
+        return cmd
+
+    async def _run_cli_command(self, prompt: str) -> Dict[str, Any]:
+        """Run Codex CLI command and parse JSONL output."""
+        # Validate cwd before use (handles case where cwd was modified via kwargs)
+        self._validate_cwd()
+        
+        cmd = self._build_cli_command(prompt)
+
+        env = os.environ.copy()
+        if self.api_key:
+            env["OPENAI_API_KEY"] = self.api_key
+
+        process = await asyncio.create_subprocess_exec(
+            *cmd,
+            stdout=asyncio.subprocess.PIPE,
+            stderr=asyncio.subprocess.PIPE,
+            cwd=str(self.cwd),
+            env=env
+        )
+
+        try:
+            stdout, stderr = await asyncio.wait_for(
+                process.communicate(),
+                timeout=self.timeout
+            )
+        except asyncio.TimeoutError:
+            process.kill()
+            await process.wait()
+            raise TimeoutError(
+                f"Codex CLI command timed out after {self.timeout} seconds"
+            )
+
+        if process.returncode != 0:
+            error_msg = stderr.decode('utf-8') if stderr else "Unknown error"
+            raise RuntimeError(
+                f"Codex CLI command failed with code {process.returncode}: {error_msg}"
+            )
+
+        output = stdout.decode('utf-8').strip()
+
+        if not output:
+            return {"result": "No output received", "session_id": None}
+
+        try:
+            lines = output.split('\n')
+            result = {"result": ""}
+            thread_id = None
+
+            for line in lines:
+                if not line.strip():
+                    continue
+
+                try:
+                    obj = json.loads(line)
+
+                    if obj.get("type") == "thread.started":
+                        thread_id = obj.get("thread_id")
+                    elif obj.get("type") == "item.completed":
+                        item = obj.get("item", {})
+                        if item.get("type") == "agent_message":
+                            text = item.get("text", "")
+                            if text:
+                                if result["result"]:
+                                    result["result"] += "\n\n"
+                                result["result"] += text
+                    elif obj.get("type") == "turn.completed":
+                        usage = obj.get("usage", {})
+                        if usage:
+                            result["usage"] = usage
+
+                except json.JSONDecodeError:
+                    # Skip invalid JSON lines (e.g., empty/partial) and continue parsing the stream
+                    pass
+
+            if thread_id:
+                result["session_id"] = thread_id
+
+            if result["result"]:
+                result["result"] = result["result"].strip()
+            else:
+                result["result"] = "No result received"
+
+            return result
+
+        except Exception as e:
+            raise ValueError(
+                f"Failed to parse Codex CLI output as JSON: {e}. Raw output: {output!r}"
+            ) from e
+
+    def _process_result(self, result: Dict[str, Any]) -> str:
+        """Process CLI command result and update internal state.
+        
+        Args:
+            result: Result dictionary from _run_cli_command
+            
+        Returns:
+            Formatted result string
+        """
+        # Update session info
+        if 'session_id' in result:
+            self._session_id = result['session_id']
+        if 'total_cost_usd' in result:
+            self._total_cost_usd = result['total_cost_usd']
+
+        # Append to message history
+        self._messages.append(result)
+
+        # Parse result based on type and subtype
+        if result.get('type') == 'result':
+            subtype = result.get('subtype', 'success')
+            if subtype == 'success':
+                return result.get('result', 'Execution completed')
+            elif subtype == 'error_max_turns':
+                return f"Error: Reached maximum turns ({self.max_turns})"
+            elif subtype == 'error_during_execution':
+                return "Error: Execution failed"
+            else:
+                return f"Completed with status: {subtype}"
+        
+        return result.get('result', 'No result received')
+
+    async def step(self) -> str:
+        """Execute a single step in the agent's workflow."""
+        if not self._current_prompt:
+            return "No prompt provided. Use run() method to execute tasks."
+
+        try:
+            result = await self._run_cli_command(self._current_prompt)
+            return self._process_result(result)
+        except (TimeoutError, RuntimeError, ValueError, FileNotFoundError, NotADirectoryError) as e:
+            # Known CLI execution errors - return as error strings
+            return f"Error during execution: {str(e)}"
+        except Exception:
+            # Re-raise unexpected errors (programming errors) for proper debugging
+            raise
+
+    async def run(self, request: Optional[str] = None, **kwargs) -> str:
+        """Execute the agent's main loop asynchronously.
+        
+        Args:
+            request: The task or prompt to execute
+            **kwargs: Temporary attribute overrides (max_turns, timeout, cwd, model, etc.)
+                     
+        Returns:
+            Result string from execution or error message
+            
+        Warning:
+            This agent is NOT safe for concurrent use. Do not call run() from multiple
+            coroutines simultaneously on the same agent instance, as attribute modifications
+            will interfere with each other.
+            
+        Note:
+            Attributes modified via kwargs are restored after execution on a "best effort" basis.
+            In rare cases, restoration may fail to avoid masking the primary execution error.
+        """
+        if not request:
+            return "Error: No request provided"
+
+        self._current_prompt = request
+        self._messages = []
+        self._session_id = None
+        self._total_cost_usd = 0.0
+
+        # Whitelist of attributes that can be modified via kwargs
+        modifiable_attrs = {
+            'max_turns', 'timeout', 'cwd', 'model', 'permission_mode'
+        }
+
+        # Safely modify attributes with validation
+        original_values = {}
+        for key, value in kwargs.items():
+            if key not in modifiable_attrs:
+                return f"Error: Attribute '{key}' cannot be modified via kwargs. Allowed: {sorted(modifiable_attrs)}"
+            
+            if not hasattr(self, key):
+                return f"Error: Unknown attribute '{key}'"
+                
+            try:
+                original_values[key] = getattr(self, key)
+                setattr(self, key, value)
+                
+                # Validate critical attributes after modification
+                if key == 'cwd':
+                    # Security: Validate and sanitize cwd to prevent directory traversal attacks
+                    if isinstance(value, str):
+                        new_cwd = Path(value)
+                    elif isinstance(value, Path):
+                        new_cwd = value
+                    else:
+                        raise ValueError("cwd must be a string or pathlib.Path")
+                    
+                    # Security: Prevent absolute paths to sensitive directories
+                    if new_cwd.is_absolute():
+                        raise ValueError(
+                            "For security reasons, cwd cannot be set to an absolute path via kwargs. "
+                            "Set cwd during agent initialization instead."
+                        )
+                    
+                    # Security: Prevent directory traversal via '..' 
+                    if ".." in new_cwd.parts:
+                        raise ValueError("cwd cannot contain parent directory references ('..')")
+                    
+                    # Resolve relative to current cwd and validate
+                    self.cwd = (self.cwd / new_cwd).resolve()
+                    self._validate_cwd()
+                elif key == 'permission_mode':
+                    valid_modes = ["default", "acceptEdits", "bypassPermissions", "plan"]
+                    if value not in valid_modes:
+                        raise ValueError(f"Invalid permission_mode: {value}. Must be one of: {valid_modes}")
+                        
+            except Exception as e:
+                # If validation fails, restore any attributes set so far
+                for restore_key, restore_value in original_values.items():
+                    try:
+                        setattr(self, restore_key, restore_value)
+                    except Exception:
+                        pass  # Best effort restoration
+                return f"Error: Failed to set attribute '{key}': {str(e)}"
+
+        try:
+            result = await self._run_cli_command(request)
+            return self._process_result(result)
+
+        except (TimeoutError, RuntimeError, ValueError, FileNotFoundError, NotADirectoryError) as e:
+            # Known CLI execution errors - return as error strings
+            return f"Error during execution: {str(e)}"
+        except Exception:
+            # Re-raise unexpected errors (programming errors) for proper debugging
+            raise
+
+        finally:
+            # Restore original values (best effort)
+            restoration_failures = []
+            for key, value in original_values.items():
+                try:
+                    setattr(self, key, value)
+                except Exception as e:
+                    # Track restoration failures for potential debugging
+                    restoration_failures.append(f"{key}: {e}")
+            
+            # Note: We don't raise restoration errors to avoid masking the primary execution result.
+            # In production, consider logging restoration_failures for debugging.
+
+    async def __call__(self, **kwargs) -> str:
+        """Execute the agent with given parameters."""
+        request = kwargs.pop('request', None) or kwargs.pop('task', None) or kwargs.pop('prompt', None)
+        return await self.run(request=request, **kwargs)
+
+    def get_session_info(self) -> Dict[str, Any]:
+        """Get information about the current session."""
+        return {
+            "session_id": self._session_id,
+            "total_cost_usd": self._total_cost_usd,
+            "num_messages": len(self._messages),
+            "cwd": str(self.cwd),
+            "model": self.model,
+            "max_turns": self.max_turns,
+            "permission_mode": self.permission_mode,
+        }
+
+    def get_messages(self) -> List[Any]:
+        """Get all messages from the current session."""
+        return self._messages.copy()
+
+    def reset(self) -> None:
+        """Reset the agent state for a new session."""
+        self._messages = []
+        self._session_id = None
+        self._total_cost_usd = 0.0
+        self._current_prompt = None
+
diff --git a/autoenv/pipeline/generator/nodes.py b/autoenv/pipeline/generator/nodes.py
index 1c9217b..a1fbc70 100644
--- a/autoenv/pipeline/generator/nodes.py
+++ b/autoenv/pipeline/generator/nodes.py
@@ -70,6 +70,9 @@ class GeneratorContext(NodeContext):
     level_gen_result: Any = None
     max_reward_result: Any = None
 
+    # Code agent backend: "miniswe" (default), "codex", or "claude"
+    code_agent_backend: str = "miniswe"
+
     # Status
     success: bool = False
     error: str | None = None
@@ -104,7 +107,7 @@ def _init_env_folder(self, ctx: GeneratorContext) -> None:
             local_time = time.localtime(t)
             ctx.env_id = time.strftime("%Y%m%d_%H%M%S", local_time) + f"_env_{ctx.env_theme}"
         if not ctx.env_folder_path:
-            ctx.env_folder_path = ctx.envs_root_path / ctx.env_id
+            ctx.env_folder_path = (ctx.envs_root_path / ctx.env_id).resolve()  # Use absolute path
         ctx.env_folder_path.mkdir(parents=True, exist_ok=True)
 
 
@@ -216,7 +219,7 @@ async def execute(self, ctx: GeneratorContext) -> None:
             ctx.error = "CodeFixNode requires env_folder_path"
             return
 
-        code_agent = ECodeAgent(llm=AsyncLLM(self.llm.config))
+        code_agent = ECodeAgent(llm=AsyncLLM(self.llm.config), backend=ctx.code_agent_backend)
 
         task = ECODE_AGENT_CODE_FIX_PROMPT.format(
             env_id=ctx.env_id,
@@ -224,7 +227,13 @@ async def execute(self, ctx: GeneratorContext) -> None:
             validator_checklist=VALIDATOR_CHECKLIST,
         )
         ctx.code_fix_result = await code_agent(requirements=task, cwds=str(ctx.env_folder_path))
-        print(f"[CodeFixNode] ✓ code fix completed")
+        
+        # Check for errors in result
+        if ctx.code_fix_result and ctx.code_fix_result.startswith("Error"):
+            print(f"[CodeFixNode] ⚠️ WARNING: {ctx.code_fix_result[:200]}...")
+            print(f"[CodeFixNode] Continuing despite error...")
+        else:
+            print(f"[CodeFixNode] ✓ code fix completed (backend={ctx.code_agent_backend})")
 
 
 class LevelGenNode(BaseNode):
@@ -239,7 +248,7 @@ async def execute(self, ctx: GeneratorContext) -> None:
             ctx.error = "LevelGenNode requires env_folder_path"
             return
 
-        code_agent = ECodeAgent(llm=AsyncLLM(self.llm.config))
+        code_agent = ECodeAgent(llm=AsyncLLM(self.llm.config), backend=ctx.code_agent_backend)
 
         task = ECODE_AGENT_LEVEL_GENERATION_PROMPT.format(
             env_id=ctx.env_id,
@@ -247,7 +256,24 @@ async def execute(self, ctx: GeneratorContext) -> None:
             validator_checklist=VALIDATOR_CHECKLIST,
         )
         ctx.level_gen_result = await code_agent(requirements=task, cwds=str(ctx.env_folder_path))
-        print(f"[LevelGenNode] ✓ level generation completed")
+        
+        # Check for errors in result
+        if ctx.level_gen_result and str(ctx.level_gen_result).startswith("Error"):
+            print(f"[LevelGenNode] ⚠️ WARNING: {str(ctx.level_gen_result)[:200]}...")
+            print(f"[LevelGenNode] Continuing despite error...")
+        else:
+            # Verify levels directory was created with level files
+            levels_dir = ctx.env_folder_path / "levels"
+            if levels_dir.exists():
+                level_files = list(levels_dir.glob("*.yaml"))
+                if len(level_files) >= 1:
+                    print(f"[LevelGenNode] ✓ level generation completed - {len(level_files)} levels created")
+                    if len(level_files) < 15:
+                        print(f"[LevelGenNode] ⚠️ Note: Expected 15 levels, got {len(level_files)}")
+                else:
+                    print(f"[LevelGenNode] ⚠️ WARNING: levels/ directory exists but contains no .yaml files!")
+            else:
+                print(f"[LevelGenNode] ⚠️ WARNING: levels/ directory NOT created!")
 
 
 class MaxRewardNode(BaseNode):
@@ -262,14 +288,25 @@ async def execute(self, ctx: GeneratorContext) -> None:
             ctx.error = "MaxRewardNode requires env_folder_path"
             return
 
-        code_agent = ECodeAgent(llm=AsyncLLM(self.llm.config))
+        code_agent = ECodeAgent(llm=AsyncLLM(self.llm.config), backend=ctx.code_agent_backend)
 
         task = ECODE_AGENT_CALCULATE_MAX_REWARD_PROMPT.format(
             env_id=ctx.env_id,
             workspace=ctx.env_folder_path,
         )
         ctx.max_reward_result = await code_agent(requirements=task, cwds=str(ctx.env_folder_path))
-        print(f"[MaxRewardNode] ✓ max reward calculation completed")
+        
+        # Check for errors in result
+        if ctx.max_reward_result and ctx.max_reward_result.startswith("Error"):
+            print(f"[MaxRewardNode] ⚠️ WARNING: {ctx.max_reward_result[:200]}...")
+            print(f"[MaxRewardNode] Continuing despite error...")
+        else:
+            # Verify level_max_rewards.json was created
+            rewards_file = ctx.env_folder_path / "level_max_rewards.json"
+            if rewards_file.exists():
+                print(f"[MaxRewardNode] ✓ max reward calculation completed - level_max_rewards.json created")
+            else:
+                print(f"[MaxRewardNode] ⚠️ WARNING: level_max_rewards.json NOT created!")
 
 
 class ArchiveNode(BaseNode):
diff --git a/autoenv/pipeline/generator/pipeline.py b/autoenv/pipeline/generator/pipeline.py
index 987a63e..8b11bca 100644
--- a/autoenv/pipeline/generator/pipeline.py
+++ b/autoenv/pipeline/generator/pipeline.py
@@ -70,6 +70,7 @@ async def run(
         requirements: str,
         output_dir: Path | str | None = None,
         env_theme: str = "random",
+        code_agent_backend: str = "miniswe",
     ) -> GeneratorContext:
         """
         Run generation pipeline.
@@ -78,6 +79,7 @@ async def run(
             requirements: Environment requirements (string or .txt file path)
             output_dir: Output root directory, defaults to workspace/envs
             env_theme: Environment theme name
+            code_agent_backend: Code agent backend ("miniswe", "codex", "claude")
 
         Returns:
             GeneratorContext: Context containing generation results
@@ -92,6 +94,7 @@ async def run(
         ctx = GeneratorContext(
             requirements=requirements,
             env_theme=env_theme,
+            code_agent_backend=code_agent_backend,
         )
         if output_dir:
             ctx.envs_root_path = Path(output_dir)
diff --git a/config/env_gen_example.yaml b/config/env_gen_example.yaml
index 491498f..6a436cf 100644
--- a/config/env_gen_example.yaml
+++ b/config/env_gen_example.yaml
@@ -19,3 +19,11 @@ theme: "<inline_text>"
 # Output directory for generated environments
 envs_root_path: "workspace/envs"
 
+# Code agent backend: "miniswe" (default), "codex", or "claude"
+# - miniswe: Uses MiniSWE agent with LLM (recommended, works with any LLM)
+# - codex: Uses OpenAI Codex CLI (requires OPENAI_API_KEY)
+# - claude: Uses Claude Agent SDK (Python package)
+#   • Required: Environment variable ANTHROPIC_API_KEY
+#   • Optional: Custom base URL ANTHROPIC_BASE_URL (set when using proxy)
+#   • Recommended: Write vars to .env and `source .env` in shell
+code_agent_backend: "miniswe"
diff --git a/run_environment_generation.py b/run_environment_generation.py
index a683560..d9d46fd 100644
--- a/run_environment_generation.py
+++ b/run_environment_generation.py
@@ -38,6 +38,7 @@ async def run_generation(
     output: str,
     mode: str = "textual",
     image_model: str | None = None,
+    code_agent_backend: str = "miniswe",
 ):
     """Run a single generation task."""
     label = theme
@@ -46,9 +47,9 @@ async def run_generation(
         theme = Path(theme).read_text(encoding="utf-8")
 
     # Step 1: Run generator pipeline
-    print(f"🚀 [{label}] Generating environment...")
+    print(f"🚀 [{label}] Generating environment (backend={code_agent_backend})...")
     gen_pipeline = GeneratorPipeline.create_default(llm_name=model)
-    gen_ctx = await gen_pipeline.run(requirements=theme, output_dir=output)
+    gen_ctx = await gen_pipeline.run(requirements=theme, output_dir=output, code_agent_backend=code_agent_backend)
 
     if not gen_ctx.success:
         print(f"❌ [{label}] Generation failed: {gen_ctx.error}")
@@ -97,6 +98,7 @@ async def main():
     mode = args.mode or cfg.get("mode") or "textual"
     image_model = cfg.get("image_model")
     concurrency = cfg.get("concurrency", 1)
+    code_agent_backend = cfg.get("code_agent_backend", "miniswe")
 
     Path(output).mkdir(parents=True, exist_ok=True)
     print(f"🔧 Config: {args.config}")
@@ -104,6 +106,7 @@ async def main():
     print(f"🎨 Image Model: {image_model}")
     print(f"📁 Output: {output}")
     print(f"📦 Mode: {mode}")
+    print(f"🔧 Code Agent: {code_agent_backend}")
 
     # Determine themes (priority: CLI --theme > themes_folder > theme)
     themes: list[str] = []
@@ -123,7 +126,7 @@ async def main():
 
     async def task(t: str):
         async with sem:
-            await run_generation(t, model, output, mode, image_model)
+            await run_generation(t, model, output, mode, image_model, code_agent_backend)
 
     with CostMonitor() as monitor:
         await asyncio.gather(*[task(t) for t in themes])