dbt-labs · clkao · Nov 3, 2025 · Nov 3, 2025 · Nov 3, 2025 · Nov 3, 2025
diff --git a/ade_bench/agents/installed_agents/abstract_installed_agent.py b/ade_bench/agents/installed_agents/abstract_installed_agent.py
@@ -26,10 +26,9 @@
 class AbstractInstalledAgent(BaseAgent, ABC):
     NAME = AgentName.ABSTRACT_INSTALLED
 
-    def __init__(self, use_mcp: bool = False, model_name: str | None = None, **kwargs):
+    def __init__(self, model_name: str | None = None, **kwargs):
         super().__init__(**kwargs)
         self._variant_config = {}
-        self._use_mcp = use_mcp
         self._model_name = model_name
 
     @property
@@ -109,27 +108,6 @@ def perform_task(
                 max_timeout_sec=config.setup_timeout_sec,  # Use setup timeout for installation
             )
 
-            # Optionally setup dbt MCP server
-            if self._use_mcp:
-                dbt_mcp_script = Path(__file__).parent.parent.parent.parent / "shared" / "scripts" / "setup-dbt-mcp.sh"
-                session.copy_to_container(
-                    dbt_mcp_script,
-                    container_dir="/scripts",
-                    container_filename="setup-dbt-mcp.sh",
-                )
-
-                # Pass db_type, project_type, and agent name
-                db_type = self._variant_config.get('db_type', 'unknown')
-                project_type = self._variant_config.get('project_type', 'unknown')
-                agent_name = self.NAME.value if hasattr(self.NAME, 'value') else str(self.NAME)
-                session.send_keys(
-                    [
-                        f"bash /scripts/setup-dbt-mcp.sh {db_type} {project_type} {agent_name}",
-                        "Enter",
-                    ],
-                    block=True,
-                    max_timeout_sec=config.setup_timeout_sec,
-                )
         except TimeoutError:
             log_harness_info(
                 logger,

diff --git a/ade_bench/agents/installed_agents/claude_code/claude_code_agent.py b/ade_bench/agents/installed_agents/claude_code/claude_code_agent.py
@@ -14,7 +14,7 @@
 
 class ClaudeCodeAgent(AbstractInstalledAgent):
     NAME = AgentName.CLAUDE_CODE
-    ALLOWED_TOOLS = ["Bash", "Edit", "Write", "NotebookEdit", "WebFetch", "mcp__dbt"]
+    ALLOWED_TOOLS = ["Bash", "Edit", "Glob", "Grep", "Write", "NotebookEdit", "WebFetch", "Skill", "mcp__dbt"]
 
     def __init__(self, **kwargs):
         super().__init__(**kwargs)

diff --git a/ade_bench/cli/ab/main.py b/ade_bench/cli/ab/main.py
@@ -116,15 +116,15 @@ def run(
         "--log-level",
         help="Set the logging level"
     ),
-    use_mcp: bool = typer.Option(
-        False,
-        "--use-mcp",
-        help="Enable MCP (Model Context Protocol) for the agent"
-    ),
     with_profiling: bool = typer.Option(
         False,
         "--with-profiling",
         help="Run the harness with a python profiler",
+    ),
+    plugins: str = typer.Option(
+        "",
+        "--plugins",
+        help="Comma-separated list of plugins to enable (e.g., 'superpowers,dbt-mcp')"
     )
 ):
     """
@@ -167,6 +167,9 @@ def run(
     elif agent_args:
         agent_kwargs["additional_args"] = agent_args
 
+    # Parse plugins
+    enabled_plugins = [p.strip() for p in plugins.split(",") if p.strip()]
+
     # Create and run the harness
     harness = Harness(
         dataset_path=dataset_path,
@@ -189,8 +192,8 @@ def run(
         db_type=db,
         project_type=project_type,
         keep_alive=persist,
-        use_mcp=use_mcp,
-        with_profiling=with_profiling
+        with_profiling=with_profiling,
+        enabled_plugins=enabled_plugins,
     )
 
     results = harness.run()

diff --git a/ade_bench/harness.py b/ade_bench/harness.py
@@ -28,6 +28,7 @@
     TrialResults,
 )
 from ade_bench.setup.setup_orchestrator import SetupOrchestrator
+from ade_bench.plugins.base_plugin import PluginContext
 from ade_bench.llms.base_llm import ContextLengthExceededError, ParseError
 from ade_bench.parsers.base_parser import UnitTestStatus, ParserResult
 from ade_bench.terminal.docker_compose_manager import DockerComposeManager
@@ -64,8 +65,8 @@ def __init__(
         db_type: str | None = None,
         project_type: str | None = None,
         keep_alive: bool = False,
-        use_mcp: bool = False,
         with_profiling: bool = False,
+        enabled_plugins: list[str] | None = None,
     ):
         """
         Runs the Terminal-Bench harness.
@@ -93,8 +94,8 @@ def __init__(
             db_type: Database type to filter variants (e.g., duckdb, postgres, sqlite, snowflake).
             project_type: Project type to filter variants (e.g., dbt, other).
             keep_alive: If True, keep containers alive when tasks fail for debugging.
-            use_mcp: If True, start a dbt MCP server after setup completes.
             with_profiling: If True, will enable the cProfiler.
+            enabled_plugins: List of plugin names to enable (from CLI --plugins).
         """
         self._run_uuid = None
         self._start_time = datetime.now(timezone.utc).isoformat()
@@ -107,11 +108,14 @@ def __init__(
         self._db_filter = db_type
         self._project_type_filter = project_type
         self._keep_alive = keep_alive
-        self._use_mcp = use_mcp
         self._with_profiling = with_profiling
+        self._enabled_plugins = enabled_plugins
 
         # Initialize setup orchestrator for variant-specific setup
-        self._setup_orchestrator = SetupOrchestrator()
+        self._setup_orchestrator = SetupOrchestrator(enabled_plugins=enabled_plugins)
+
+        # Keep reference to registry for pre-agent/post-trial hooks
+        self.plugin_registry = self._setup_orchestrator.plugin_registry
 
         self._output_path = output_path
         self._agent_name = agent_name
@@ -180,9 +184,6 @@ def _create_agent_for_task(self, task_id: str) -> BaseAgent:
         if self._model_name:
             agent_kwargs["model_name"] = self._model_name
 
-        # Pass use_mcp flag to installed agents
-        agent_kwargs["use_mcp"] = self._use_mcp
-
         return AgentFactory.get_agent(self._agent_name, **agent_kwargs)
 
     def _init_dataset(self) -> None:
@@ -522,7 +523,8 @@ def _run_setup(
                 terminal=terminal,
                 session=session,
                 file_diff_handler=file_diff_handler,
-                trial_handler=trial_handler
+                trial_handler=trial_handler,
+                enabled_plugins=self._enabled_plugins
             )
 
             # Run setup with timeout using asyncio
@@ -569,7 +571,7 @@ def _run_trial(
             model_name=self._model_name,
             db_type=config.get("db_type"),
             project_type=config.get("project_type"),
-            used_mcp=self._use_mcp,
+            used_mcp=self.plugin_registry.did_plugin_run("dbt-mcp"),
         )
 
         with spin_up_terminal(
@@ -651,6 +653,21 @@ def _run_trial(
             if hasattr(task_agent, 'set_variant_config'):
                 task_agent.set_variant_config(config)
 
+            # Build context for agent-phase hooks
+            context = PluginContext(
+                terminal=terminal,
+                session=session,
+                trial_handler=trial_handler,
+                task_id=trial_handler.task_id,
+                variant=config,
+                agent_name=task_agent.NAME,
+                db_type=config.get("database", {}).get("type") if config.get("database") else config.get("db_type"),
+                project_type=config.get("project_type"),
+            )
+
+            # PRE-AGENT HOOKS
+            self.plugin_registry.run_hooks("pre_agent", context)
+
             log_harness_info(self._logger, trial_handler.task_id, "agent", f"Starting agent...")
             try:
                 agent_result, agent_failure_mode = self._run_agent(
@@ -716,6 +733,9 @@ def _run_trial(
                 results.runtime_ms = agent_result.runtime_ms
                 results.cost_usd = agent_result.cost_usd
 
+            # POST-TRIAL HOOKS
+            self.plugin_registry.run_hooks("post_trial", context)
+
             # Always kill the agent session to ensure cleanup, regardless of success/failure
             try:
                 session.kill_session()
@@ -1138,7 +1158,7 @@ def _execute_single_trial(
                 model_name=self._model_name,
                 db_type=config.get("db_type"),
                 project_type=config.get("project_type"),
-                used_mcp=self._use_mcp,
+                used_mcp=self.plugin_registry.did_plugin_run("dbt-mcp"),
             )
             return trial_results
 

diff --git a/ade_bench/plugins/__init__.py b/ade_bench/plugins/__init__.py
diff --git a/ade_bench/plugins/base_plugin.py b/ade_bench/plugins/base_plugin.py
@@ -0,0 +1,67 @@
+# ABOUTME: Base plugin interface and context for ADE-Bench plugin system
+# ABOUTME: Plugins hook into setup/agent lifecycle phases with conditional execution
+
+from abc import ABC
+from dataclasses import dataclass
+from typing import Any, Optional, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ade_bench.agents.agent_name import AgentName
+    from ade_bench.terminal.docker_compose_manager import DockerComposeManager
+    from ade_bench.handlers.trial_handler import TrialHandler
+
+
+@dataclass
+class PluginContext:
+    """Immutable context passed to plugin hooks"""
+    terminal: "DockerComposeManager"
+    session: Any  # libtmux Session
+    trial_handler: "TrialHandler"
+    task_id: str
+    variant: dict
+    agent_name: Optional["AgentName"] = None
+    db_type: Optional[str] = None
+    project_type: Optional[str] = None
+
+
+class BasePlugin(ABC):
+    """Base class for ADE-Bench plugins
+
+    Subclasses override class attributes (name, description) and implement
+    lifecycle hook methods (pre_setup, post_setup, pre_agent, post_trial).
+    """
+
+    # Subclasses override these
+    name: str = ""
+    description: str = ""
+
+    def __init__(self):
+        pass
+
+    def should_run(self, phase: str, context: PluginContext) -> bool:
+        """Override to conditionally execute based on context
+
+        Args:
+            phase: Hook phase name (pre_setup, post_setup, pre_agent, post_trial)
+            context: Plugin execution context
+
+        Returns:
+            True if plugin should run for this phase/context
+        """
+        return True
+
+    def pre_setup(self, context: PluginContext) -> None:
+        """Hook before task's setup.sh execution"""
+        pass
+
+    def post_setup(self, context: PluginContext) -> None:
+        """Hook after task's setup.sh execution"""
+        pass
+
+    def pre_agent(self, context: PluginContext) -> None:
+        """Hook before agent execution"""
+        pass
+
+    def post_trial(self, context: PluginContext) -> None:
+        """Hook after trial completion"""
+        pass
diff --git a/ade_bench/plugins/dbt_mcp.py b/ade_bench/plugins/dbt_mcp.py
@@ -0,0 +1,54 @@
+# ABOUTME: dbt-mcp plugin registers dbt MCP server with installed agents
+# ABOUTME: Uses existing setup-dbt-mcp.sh script for Snowflake dbt projects
+
+import logging
+from pathlib import Path
+from ade_bench.plugins.base_plugin import BasePlugin, PluginContext
+
+logger = logging.getLogger(__name__)
+
+
+class DbtMcpPlugin(BasePlugin):
+    name = "dbt-mcp"
+    description = "Registers dbt MCP server with installed agents"
+
+    def should_run(self, phase: str, context: PluginContext) -> bool:
+        """Only run for dbt projects on Snowflake"""
+        is_dbt = context.project_type in ["dbt", "dbt-fusion"]
+        is_snowflake = context.db_type == "snowflake"
+        return is_dbt and is_snowflake
+
+    def pre_agent(self, context: PluginContext) -> None:
+        """Register dbt MCP server before agent starts"""
+        # Use existing script at shared/scripts/setup-dbt-mcp.sh
+        script_path = context.trial_handler._shared_path / "scripts/setup-dbt-mcp.sh"
+
+        logger.info(f"[DbtMcpPlugin] Preparing to register dbt MCP server")
+        logger.info(f"[DbtMcpPlugin] Source script path: {script_path}")
+        logger.info(f"[DbtMcpPlugin] Script exists: {script_path.exists()}")
+
+        if not script_path.exists():
+            logger.error(f"[DbtMcpPlugin] Script not found at {script_path}")
+            raise FileNotFoundError(f"dbt-mcp setup script not found at {script_path}")
+
+        logger.info(f"[DbtMcpPlugin] Copying script to container at /scripts/setup-dbt-mcp.sh")
+        context.terminal.copy_to_container(
+            paths=script_path,
+            container_dir="/scripts",
+            container_filename="setup-dbt-mcp.sh"
+        )
+        logger.info(f"[DbtMcpPlugin] Script copied successfully")
+
+        # Convert agent enum to string for script
+        agent_name = context.agent_name.value if context.agent_name else "unknown"
+
+        logger.info(f"[DbtMcpPlugin] Executing dbt-mcp setup for {agent_name}")
+        context.session.send_keys(
+            [
+                f"bash /scripts/setup-dbt-mcp.sh {context.db_type} {context.project_type} {agent_name}",
+                "Enter",
+            ],
+            block=True,
+            max_timeout_sec=300
+        )
+        logger.info(f"[DbtMcpPlugin] dbt-mcp setup completed")