openai
diff --git a/‎src/agents/_run_impl.py
+4-3 b/‎src/agents/_run_impl.py
+4-3
diff --git a/‎src/agents/agent.py
+20 b/‎src/agents/agent.py
+20
diff --git a/‎src/agents/run.py
+20-4 b/‎src/agents/run.py
+20-4
diff --git a/‎tests/mcp/__init__.py b/‎tests/mcp/__init__.py
diff --git a/‎tests/mcp/conftest.py
+11 b/‎tests/mcp/conftest.py
+11
diff --git a/‎tests/mcp/helpers.py
+54 b/‎tests/mcp/helpers.py
+54
diff --git a/‎tests/mcp/test_caching.py
+57 b/‎tests/mcp/test_caching.py
+57
diff --git a/‎tests/mcp/test_connect_disconnect.py
+69 b/‎tests/mcp/test_connect_disconnect.py
+69
@@ -50,7 +50,7 @@
 from .models.interface import ModelTracing
 from .run_context import RunContextWrapper, TContext
 from .stream_events import RunItemStreamEvent, StreamEvent
-from .tool import ComputerTool, FunctionTool, FunctionToolResult
+from .tool import ComputerTool, FunctionTool, FunctionToolResult, Tool
 from .tracing import (
     SpanError,
     Trace,
@@ -301,6 +301,7 @@ def process_model_response(
         cls,
         *,
         agent: Agent[Any],
+        all_tools: list[Tool],
         response: ModelResponse,
         output_schema: AgentOutputSchema | None,
         handoffs: list[Handoff],
@@ -312,8 +313,8 @@ def process_model_response(
         computer_actions = []
 
         handoff_map = {handoff.tool_name: handoff for handoff in handoffs}
-        function_map = {tool.name: tool for tool in agent.tools if isinstance(tool, FunctionTool)}
-        computer_tool = next((tool for tool in agent.tools if isinstance(tool, ComputerTool)), None)
+        function_map = {tool.name: tool for tool in all_tools if isinstance(tool, FunctionTool)}
+        computer_tool = next((tool for tool in all_tools if isinstance(tool, ComputerTool)), None)
 
         for output in response.output:
             if isinstance(output, ResponseOutputMessage):
 
@@ -12,6 +12,7 @@
 from .handoffs import Handoff
 from .items import ItemHelpers
 from .logger import logger
+from .mcp import MCPUtil
 from .model_settings import ModelSettings
 from .models.interface import Model
 from .run_context import RunContextWrapper, TContext
@@ -21,6 +22,7 @@
 
 if TYPE_CHECKING:
     from .lifecycle import AgentHooks
+    from .mcp import MCPServer
     from .result import RunResult
 
 
@@ -107,6 +109,16 @@ class Agent(Generic[TContext]):
     tools: list[Tool] = field(default_factory=list)
     """A list of tools that the agent can use."""
 
+    mcp_servers: list[MCPServer] = field(default_factory=list)
+    """A list of [Model Context Protocol](https://modelcontextprotocol.io/) servers that
+    the agent can use. Every time the agent runs, it will include tools from these servers in the
+    list of available tools.
+
+    NOTE: You are expected to manage the lifecycle of these servers. Specifically, you must call
+    `server.connect()` before passing it to the agent, and `server.cleanup()` when the server is no
+    longer needed.
+    """
+
     input_guardrails: list[InputGuardrail[TContext]] = field(default_factory=list)
     """A list of checks that run in parallel to the agent's execution, before generating a
     response. Runs only if the agent is the first agent in the chain.
@@ -205,3 +217,11 @@ async def get_system_prompt(self, run_context: RunContextWrapper[TContext]) -> s
             logger.error(f"Instructions must be a string or a function, got {self.instructions}")
 
         return None
+
+    async def get_mcp_tools(self) -> list[Tool]:
+        """Fetches the available tools from the MCP servers."""
+        return await MCPUtil.get_all_function_tools(self.mcp_servers)
+
+    async def get_all_tools(self) -> list[Tool]:
+        """All agent tools, including MCP tools and function tools."""
+        return await MCPUtil.get_all_function_tools(self.mcp_servers) + self.tools
@@ -7,6 +7,8 @@
 
 from openai.types.responses import ResponseCompletedEvent
 
+from agents.tool import Tool
+
 from ._run_impl import (
     NextStepFinalOutput,
     NextStepHandoff,
@@ -177,7 +179,8 @@ async def run(
                     # agent changes, or if the agent loop ends.
                     if current_span is None:
                         handoff_names = [h.agent_name for h in cls._get_handoffs(current_agent)]
-                        tool_names = [t.name for t in current_agent.tools]
+                        all_tools = await cls._get_all_tools(current_agent)
+                        tool_names = [t.name for t in all_tools]
                         if output_schema := cls._get_output_schema(current_agent):
                             output_type_name = output_schema.output_type_name()
                         else:
@@ -217,6 +220,7 @@ async def run(
                             ),
                             cls._run_single_turn(
                                 agent=current_agent,
+                                all_tools=all_tools,
                                 original_input=original_input,
                                 generated_items=generated_items,
                                 hooks=hooks,
@@ -228,6 +232,7 @@ async def run(
                     else:
                         turn_result = await cls._run_single_turn(
                             agent=current_agent,
+                            all_tools=all_tools,
                             original_input=original_input,
                             generated_items=generated_items,
                             hooks=hooks,
@@ -627,7 +632,7 @@ async def _run_single_turn_streamed(
         system_prompt = await agent.get_system_prompt(context_wrapper)
 
         handoffs = cls._get_handoffs(agent)
-
+        all_tools = await cls._get_all_tools(agent)
         model = cls._get_model(agent, run_config)
         model_settings = agent.model_settings.resolve(run_config.model_settings)
         final_response: ModelResponse | None = None
@@ -640,7 +645,7 @@ async def _run_single_turn_streamed(
             system_prompt,
             input,
             model_settings,
-            agent.tools,
+            all_tools,
             output_schema,
             handoffs,
             get_model_tracing_impl(
@@ -677,6 +682,7 @@ async def _run_single_turn_streamed(
             pre_step_items=streamed_result.new_items,
             new_response=final_response,
             output_schema=output_schema,
+            all_tools=all_tools,
             handoffs=handoffs,
             hooks=hooks,
             context_wrapper=context_wrapper,
@@ -691,6 +697,7 @@ async def _run_single_turn(
         cls,
         *,
         agent: Agent[TContext],
+        all_tools: list[Tool],
         original_input: str | list[TResponseInputItem],
         generated_items: list[RunItem],
         hooks: RunHooks[TContext],
@@ -721,6 +728,7 @@ async def _run_single_turn(
             system_prompt,
             input,
             output_schema,
+            all_tools,
             handoffs,
             context_wrapper,
             run_config,
@@ -732,6 +740,7 @@ async def _run_single_turn(
             pre_step_items=generated_items,
             new_response=new_response,
             output_schema=output_schema,
+            all_tools=all_tools,
             handoffs=handoffs,
             hooks=hooks,
             context_wrapper=context_wrapper,
@@ -743,6 +752,7 @@ async def _get_single_step_result_from_response(
         cls,
         *,
         agent: Agent[TContext],
+        all_tools: list[Tool],
         original_input: str | list[TResponseInputItem],
         pre_step_items: list[RunItem],
         new_response: ModelResponse,
@@ -754,6 +764,7 @@ async def _get_single_step_result_from_response(
     ) -> SingleStepResult:
         processed_response = RunImpl.process_model_response(
             agent=agent,
+            all_tools=all_tools,
             response=new_response,
             output_schema=output_schema,
             handoffs=handoffs,
@@ -853,6 +864,7 @@ async def _get_new_response(
         system_prompt: str | None,
         input: list[TResponseInputItem],
         output_schema: AgentOutputSchema | None,
+        all_tools: list[Tool],
         handoffs: list[Handoff],
         context_wrapper: RunContextWrapper[TContext],
         run_config: RunConfig,
@@ -863,7 +875,7 @@ async def _get_new_response(
             system_instructions=system_prompt,
             input=input,
             model_settings=model_settings,
-            tools=agent.tools,
+            tools=all_tools,
             output_schema=output_schema,
             handoffs=handoffs,
             tracing=get_model_tracing_impl(
@@ -892,6 +904,10 @@ def _get_handoffs(cls, agent: Agent[Any]) -> list[Handoff]:
                 handoffs.append(handoff(handoff_item))
         return handoffs
 
+    @classmethod
+    async def _get_all_tools(cls, agent: Agent[Any]) -> list[Tool]:
+        return await agent.get_all_tools()
+
     @classmethod
     def _get_model(cls, agent: Agent[Any], run_config: RunConfig) -> Model:
         if isinstance(run_config.model, Model):
 
@@ -0,0 +1,11 @@
+import os
+import sys
+
+
+# Skip MCP tests on Python 3.9
+def pytest_ignore_collect(collection_path, config):
+    if sys.version_info[:2] == (3, 9):
+        this_dir = os.path.dirname(__file__)
+
+        if str(collection_path).startswith(this_dir):
+            return True
@@ -0,0 +1,54 @@
+import json
+import shutil
+from typing import Any
+
+from mcp import Tool as MCPTool
+from mcp.types import CallToolResult, TextContent
+
+from agents.mcp import MCPServer
+
+tee = shutil.which("tee") or ""
+assert tee, "tee not found"
+
+
+# Added dummy stream classes for patching stdio_client to avoid real I/O during tests
+class DummyStream:
+    async def send(self, msg):
+        pass
+
+    async def receive(self):
+        raise Exception("Dummy receive not implemented")
+
+
+class DummyStreamsContextManager:
+    async def __aenter__(self):
+        return (DummyStream(), DummyStream())
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        pass
+
+
+class FakeMCPServer(MCPServer):
+    def __init__(self, tools: list[MCPTool] | None = None):
+        self.tools: list[MCPTool] = tools or []
+        self.tool_calls: list[str] = []
+        self.tool_results: list[str] = []
+
+    def add_tool(self, name: str, input_schema: dict[str, Any]):
+        self.tools.append(MCPTool(name=name, inputSchema=input_schema))
+
+    async def connect(self):
+        pass
+
+    async def cleanup(self):
+        pass
+
+    async def list_tools(self):
+        return self.tools
+
+    async def call_tool(self, tool_name: str, arguments: dict[str, Any] | None) -> CallToolResult:
+        self.tool_calls.append(tool_name)
+        self.tool_results.append(f"result_{tool_name}_{json.dumps(arguments)}")
+        return CallToolResult(
+            content=[TextContent(text=self.tool_results[-1], type="text")],
+        )
@@ -0,0 +1,57 @@
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from mcp.types import ListToolsResult, Tool as MCPTool
+
+from agents.mcp import MCPServerStdio
+
+from .helpers import DummyStreamsContextManager, tee
+
+
+@pytest.mark.asyncio
+@patch("mcp.client.stdio.stdio_client", return_value=DummyStreamsContextManager())
+@patch("mcp.client.session.ClientSession.initialize", new_callable=AsyncMock, return_value=None)
+@patch("mcp.client.session.ClientSession.list_tools")
+async def test_server_caching_works(
+    mock_list_tools: AsyncMock, mock_initialize: AsyncMock, mock_stdio_client
+):
+    """Test that if we turn caching on, the list of tools is cached and not fetched from the server
+    on each call to `list_tools()`.
+    """
+    server = MCPServerStdio(
+        params={
+            "command": tee,
+        },
+        cache_tools_list=True,
+    )
+
+    tools = [
+        MCPTool(name="tool1", inputSchema={}),
+        MCPTool(name="tool2", inputSchema={}),
+    ]
+
+    mock_list_tools.return_value = ListToolsResult(tools=tools)
+
+    async with server:
+        # Call list_tools() multiple times
+        tools = await server.list_tools()
+        assert tools == tools
+
+        assert mock_list_tools.call_count == 1, "list_tools() should have been called once"
+
+        # Call list_tools() again, should return the cached value
+        tools = await server.list_tools()
+        assert tools == tools
+
+        assert mock_list_tools.call_count == 1, "list_tools() should not have been called again"
+
+        # Invalidate the cache and call list_tools() again
+        server.invalidate_tools_cache()
+        tools = await server.list_tools()
+        assert tools == tools
+
+        assert mock_list_tools.call_count == 2, "list_tools() should be called again"
+
+        # Without invalidating the cache, calling list_tools() again should return the cached value
+        tools = await server.list_tools()
+        assert tools == tools
@@ -0,0 +1,69 @@
+from unittest.mock import AsyncMock, patch
+
+import pytest
+from mcp.types import ListToolsResult, Tool as MCPTool
+
+from agents.mcp import MCPServerStdio
+
+from .helpers import DummyStreamsContextManager, tee
+
+
+@pytest.mark.asyncio
+@patch("mcp.client.stdio.stdio_client", return_value=DummyStreamsContextManager())
+@patch("mcp.client.session.ClientSession.initialize", new_callable=AsyncMock, return_value=None)
+@patch("mcp.client.session.ClientSession.list_tools")
+async def test_async_ctx_manager_works(
+    mock_list_tools: AsyncMock, mock_initialize: AsyncMock, mock_stdio_client
+):
+    """Test that the async context manager works."""
+    server = MCPServerStdio(
+        params={
+            "command": tee,
+        },
+        cache_tools_list=True,
+    )
+
+    tools = [
+        MCPTool(name="tool1", inputSchema={}),
+        MCPTool(name="tool2", inputSchema={}),
+    ]
+
+    mock_list_tools.return_value = ListToolsResult(tools=tools)
+
+    assert server.session is None, "Server should not be connected"
+
+    async with server:
+        assert server.session is not None, "Server should be connected"
+
+    assert server.session is None, "Server should be disconnected"
+
+
+@pytest.mark.asyncio
+@patch("mcp.client.stdio.stdio_client", return_value=DummyStreamsContextManager())
+@patch("mcp.client.session.ClientSession.initialize", new_callable=AsyncMock, return_value=None)
+@patch("mcp.client.session.ClientSession.list_tools")
+async def test_manual_connect_disconnect_works(
+    mock_list_tools: AsyncMock, mock_initialize: AsyncMock, mock_stdio_client
+):
+    """Test that the async context manager works."""
+    server = MCPServerStdio(
+        params={
+            "command": tee,
+        },
+        cache_tools_list=True,
+    )
+
+    tools = [
+        MCPTool(name="tool1", inputSchema={}),
+        MCPTool(name="tool2", inputSchema={}),
+    ]
+
+    mock_list_tools.return_value = ListToolsResult(tools=tools)
+
+    assert server.session is None, "Server should not be connected"
+
+    await server.connect()
+    assert server.session is not None, "Server should be connected"
+
+    await server.cleanup()
+    assert server.session is None, "Server should be disconnected"