diff --git a/plan/agent-card-rfc.md b/plan/agent-card-rfc.md
index 20d6df278..af71282a6 100644
--- a/plan/agent-card-rfc.md
+++ b/plan/agent-card-rfc.md
@@ -7,6 +7,8 @@ A loader validates fields based on `type` and loads a single file or a directory
 optional/experimental and described in a separate spec.
 AgentCards now support an optional `description` field used for tool descriptions when
 agents are exposed as tools (MCP or agent-as-tool wiring).
+AgentCards may enable local shell execution via `shell: true` with optional `cwd`.
+CLI `--card-tool` loads AgentCards and exposes them as tools on the default agent.
 CLI runs also auto-load cards from `.fast-agent/agent-cards/` (agents) and
 `.fast-agent/tool-cards/` (tool cards) when those directories exist and contain
 supported card files.
@@ -109,9 +111,10 @@ Allowed fields:
 - `agents` (agents-as-tools)
 - `servers`, `tools`, `resources`, `prompts`, `skills`
 - `model`, `use_history`, `request_params`, `human_input`, `api_key`
-- `history_source`, `history_merge_target`
+- `history_mode`
 - `max_parallel`, `child_timeout_sec`, `max_display_instances`
 - `function_tools`, `tool_hooks` (see separate spec)
+- `shell`, `cwd`
 - `messages` (card-only history file)
 
 ### type: `chain` (maps to `@fast.chain`)
@@ -209,6 +212,7 @@ scope.
 ### Fields (AgentCard)
 These fields are set on the **orchestrator** (parent) AgentCard because it
 controls child invocation and the initial context passed to child agents.
+They are **proposed** and not yet part of the validator or loader.
 
 - `history_source`: `none` | `messages` | `child` | `orchestrator` | `cumulative` *)
 - `history_merge_target`: `none` | `messages` **) | `child` | `orchestrator` | `cumulative` *)
@@ -371,8 +375,10 @@ You are a concise analyst.
 
 ### Runtime tool injection (optional)
 - `/card --tool` exposes the loaded agent as a tool on the **current** agent.
+- CLI: `fast-agent go --card-tool <path>` loads cards and exposes them as tools on the default agent.
 - Tool names default to `agent__{name}`.
 - Tool descriptions prefer `description`; fall back to the agent instruction.
+- Tool calls use a single `message` argument.
 - Default behavior is **stateless**: fresh clone per call with no history load or merge
   (`history_source=none`, `history_merge_target=none`).
 
@@ -508,10 +514,6 @@ See [plan/agent-card-rfc-sample.md](plan/agent-card-rfc-sample.md).
   set includes it. This creates a self-referential tool and can recurse if the
   model calls it. Filter out the current agent and dedupe tool names.
   (`src/fast_agent/ui/interactive_prompt.py`, `src/fast_agent/acp/slash_commands.py`)
-- `add_agent_tool` forwards to the **live child instance** (`child.send`) rather
-  than a detached clone. This diverges from Agents-as-Tools isolation semantics
-  and can leak history/usage across parallel calls.
-  (`src/fast_agent/agents/tool_agent.py`)
 - AgentCard `type` currently defaults to `agent` when missing. If strict validation
   is expected, this should be an error (otherwise unrelated frontmatter files are
   accepted silently).
@@ -521,31 +523,14 @@ See [plan/agent-card-rfc-sample.md](plan/agent-card-rfc-sample.md).
   (`src/fast_agent/ui/interactive_prompt.py`, `src/fast_agent/acp/slash_commands.py`)
 
 ## Appendix: Code Review Fix Plan
-General plan: extract the child-tool execution helpers from `AgentsAsToolsAgent`
-and reuse them in the `/card --tool` flow so injected agent tools behave the same
-as agents-as-tools (detached clones, optional history merge, usage rollup).
-
-Proposed steps:
-1) **Extract shared helpers** from `AgentsAsToolsAgent` into a small module, e.g.
-   `fast_agent/agents/agent_tool_helpers.py`:
-   - `serialize_tool_args(args) -> str`
-   - `spawn_child_clone(child, instance_name, history_source)`
-   - `invoke_child_tool(clone, args, suppress_display)`
-   - `merge_child_usage_and_history(child, clone, merge_target)`
-2) **Refactor `AgentsAsToolsAgent`** to call these helpers without changing behavior.
-   This keeps parity with current features (history modes, progress, usage merge).
-3) **Update `/card --tool` path** (TUI + ACP):
-   - Filter out the current agent from `loaded_names` to avoid self-tools.
-   - Use the shared helpers to create a tool wrapper that spawns detached clones
-     per call (not the live child instance).
-   - Deduplicate tools by name and surface a warning if a tool already exists.
-4) **Add tests**:
-   - `/card --tool` does not inject self.
-   - Injected tools use detached clones (no shared history).
-   - History merge behavior respects `history_source` and `history_merge_target`.
-5) **ACP coverage**:
-   - Ensure `/card` updates available commands and keeps session modes consistent.
-   - Validate tool injection works in ACP and TUI with identical behavior.
+Current status:
+- Tool injection uses detached clones (ToolAgent parity).
+- `/card --tool` and `--card-tool` share tool-registration wiring.
+
+Remaining work:
+1) Filter out the current agent from `/card --tool` to avoid self-tools.
+2) Deduplicate tool names on injection and surface a warning if a tool already exists.
+3) Align `/card --tool` with Agents-as-Tools helpers if advanced history merge modes are implemented.
 
 ## Appendix: Next-stage Work Items
 - **Cumulative session history**: no shared, merged transcript exists today; requires
diff --git a/src/fast_agent/cli/commands/go.py b/src/fast_agent/cli/commands/go.py
index 863e2a2cd..1fc60824d 100644
--- a/src/fast_agent/cli/commands/go.py
+++ b/src/fast_agent/cli/commands/go.py
@@ -156,6 +156,11 @@ async def _run_agent(
     watch: bool = False,
 ) -> None:
     """Async implementation to run an interactive agent."""
+    if mode == "serve" and transport in ["stdio", "acp"]:
+        from fast_agent.ui.console import configure_console_stream
+
+        configure_console_stream("stderr")
+
     from fast_agent import FastAgent
     from fast_agent.agents.llm_agent import LlmAgent
     from fast_agent.mcp.prompts.prompt_load import load_prompt
diff --git a/src/fast_agent/core/fastagent.py b/src/fast_agent/core/fastagent.py
index 869054559..b8d5c6085 100644
--- a/src/fast_agent/core/fastagent.py
+++ b/src/fast_agent/core/fastagent.py
@@ -1131,6 +1131,7 @@ async def load_card_source(source: str) -> list[str]:
                                     server_name=server_name or f"{self.name}-MCP-Server",
                                     server_description=server_description,
                                     tool_description=tool_description,
+                                    host=self.args.host,
                                     get_registry_version=self._get_registry_version,
                                 )
 
diff --git a/src/fast_agent/llm/provider_key_manager.py b/src/fast_agent/llm/provider_key_manager.py
index 1c2768d2f..336d88d8e 100644
--- a/src/fast_agent/llm/provider_key_manager.py
+++ b/src/fast_agent/llm/provider_key_manager.py
@@ -91,11 +91,22 @@ def get_api_key(provider_name: str, config: Any) -> str:
         if provider_name == "fast-agent":
             return ""
 
+        # Check for request-scoped token first (token passthrough from MCP server)
+        # This allows clients to pass their own HF token via Authorization header
+        if provider_name in {"hf", "huggingface"}:
+            from fast_agent.mcp.auth.context import request_bearer_token
+
+            ctx_token = request_bearer_token.get()
+            if ctx_token:
+                return ctx_token
+
         # Google Vertex AI uses ADC/IAM and does not require an API key.
         if provider_name == "google":
             try:
                 cfg = config.model_dump() if isinstance(config, BaseModel) else config
-                if isinstance(cfg, dict) and bool((cfg.get("google") or {}).get("vertex_ai", {}).get("enabled")):
+                if isinstance(cfg, dict) and bool(
+                    (cfg.get("google") or {}).get("vertex_ai", {}).get("enabled")
+                ):
                     return ""
             except Exception:
                 pass
diff --git a/src/fast_agent/mcp/auth/__init__.py b/src/fast_agent/mcp/auth/__init__.py
new file mode 100644
index 000000000..e3f61e168
--- /dev/null
+++ b/src/fast_agent/mcp/auth/__init__.py
@@ -0,0 +1,7 @@
+"""Authentication modules for MCP server."""
+
+from fast_agent.mcp.auth.context import request_bearer_token
+from fast_agent.mcp.auth.middleware import HFAuthHeaderMiddleware
+from fast_agent.mcp.auth.presence import PresenceTokenVerifier
+
+__all__ = ["HFAuthHeaderMiddleware", "PresenceTokenVerifier", "request_bearer_token"]
diff --git a/src/fast_agent/mcp/auth/context.py b/src/fast_agent/mcp/auth/context.py
new file mode 100644
index 000000000..b9aa8a99e
--- /dev/null
+++ b/src/fast_agent/mcp/auth/context.py
@@ -0,0 +1,8 @@
+"""Context variables for request-scoped authentication."""
+
+from contextvars import ContextVar
+
+# Stores the bearer token for the current request.
+# Used to pass through to LLM providers (e.g., HuggingFace).
+# Each async task has its own isolated copy of this variable.
+request_bearer_token: ContextVar[str | None] = ContextVar("request_bearer_token", default=None)
diff --git a/src/fast_agent/mcp/auth/middleware.py b/src/fast_agent/mcp/auth/middleware.py
new file mode 100644
index 000000000..3715d2717
--- /dev/null
+++ b/src/fast_agent/mcp/auth/middleware.py
@@ -0,0 +1,37 @@
+"""Middleware for handling HuggingFace-specific authentication headers."""
+
+from starlette.types import ASGIApp, Receive, Scope, Send
+
+
+class HFAuthHeaderMiddleware:
+    """
+    Middleware that copies X-HF-Authorization to Authorization header.
+
+    HuggingFace Spaces use X-HF-Authorization for authentication, but
+    FastMCP's BearerAuthBackend only checks the standard Authorization header.
+    This middleware normalizes the headers so both work.
+    """
+
+    def __init__(self, app: ASGIApp):
+        self.app = app
+
+    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
+        if scope["type"] != "http":
+            await self.app(scope, receive, send)
+            return
+
+        headers = scope.get("headers", [])
+
+        # Check if Authorization header already exists
+        has_auth = any(k.lower() == b"authorization" for k, _ in headers)
+
+        # If no Authorization but X-HF-Authorization exists, copy it
+        if not has_auth:
+            for key, value in headers:
+                if key.lower() == b"x-hf-authorization":
+                    # Add as Authorization header
+                    new_headers = list(headers) + [(b"authorization", value)]
+                    scope = dict(scope, headers=new_headers)
+                    break
+
+        await self.app(scope, receive, send)
diff --git a/src/fast_agent/mcp/auth/presence.py b/src/fast_agent/mcp/auth/presence.py
new file mode 100644
index 000000000..7eba87d90
--- /dev/null
+++ b/src/fast_agent/mcp/auth/presence.py
@@ -0,0 +1,42 @@
+"""Presence-only token verifier for MCP server authentication."""
+
+from mcp.server.auth.provider import AccessToken, TokenVerifier
+
+
+class PresenceTokenVerifier(TokenVerifier):
+    """
+    Simple token verifier that only checks token presence.
+
+    Does not validate the token against any external service - downstream
+    services (e.g., HuggingFace inference API) handle actual validation.
+    """
+
+    def __init__(self, provider: str = "generic", scopes: list[str] | None = None):
+        """
+        Initialize the presence token verifier.
+
+        Args:
+            provider: Name of the OAuth provider (for logging/debugging).
+            scopes: List of scopes to assign to valid tokens. Defaults to ["access"].
+        """
+        self.provider = provider
+        self.scopes = scopes or ["access"]
+
+    async def verify_token(self, token: str) -> AccessToken | None:
+        """
+        Verify that a token is present (non-empty).
+
+        Args:
+            token: The bearer token to verify.
+
+        Returns:
+            AccessToken if token is present, None otherwise.
+        """
+        if not token or not token.strip():
+            return None
+
+        return AccessToken(
+            token=token,
+            client_id="bearer-client",
+            scopes=self.scopes,
+        )
diff --git a/src/fast_agent/mcp/server/agent_server.py b/src/fast_agent/mcp/server/agent_server.py
index 19f5d61d8..eb1b38be3 100644
--- a/src/fast_agent/mcp/server/agent_server.py
+++ b/src/fast_agent/mcp/server/agent_server.py
@@ -8,6 +8,7 @@
 import signal
 import time
 from contextlib import AsyncExitStack, asynccontextmanager
+from importlib.metadata import version as get_version
 from typing import Any, AsyncContextManager, Awaitable, Callable, Literal, Protocol, cast
 
 from mcp.server.fastmcp import Context as MCPContext
@@ -22,6 +23,32 @@
 logger = get_logger(__name__)
 
 
+def _get_oauth_config() -> tuple[str | None, list[str], str]:
+    """
+    Read OAuth configuration from environment variables.
+
+    Returns:
+        Tuple of (provider, scopes, resource_url).
+        provider is None if OAuth is not enabled.
+    """
+    oauth_provider = os.environ.get("FAST_AGENT_SERVE_OAUTH", "").lower()
+
+    # Normalize provider aliases
+    if oauth_provider in ("hf", "huggingface"):
+        oauth_provider = "huggingface"
+    elif not oauth_provider:
+        oauth_provider = None
+
+    # Parse scopes from comma-separated string
+    oauth_scopes_str = os.environ.get("FAST_AGENT_OAUTH_SCOPES", "")
+    oauth_scopes = [s.strip() for s in oauth_scopes_str.split(",") if s.strip()] or ["access"]
+
+    # Resource URL defaults to localhost:8000
+    resource_url = os.environ.get("FAST_AGENT_OAUTH_RESOURCE_URL", "http://localhost:8000")
+
+    return oauth_provider, oauth_scopes, resource_url
+
+
 TransportMode = Literal["http", "sse", "stdio"]
 McpTransportMode = Literal["streamable-http", "sse", "stdio"]
 
@@ -50,6 +77,7 @@ def __init__(
         server_name: str = "FastAgent-MCP-Server",
         server_description: str | None = None,
         tool_description: str | None = None,
+        host: str = "0.0.0.0",
         get_registry_version: Callable[[], int] | None = None,
     ) -> None:
         """Initialize the server with the provided agent app."""
@@ -62,11 +90,51 @@ def __init__(
         self._shared_instance_lock = asyncio.Lock()
         self._shared_active_requests = 0
         self._stale_instances: list[AgentInstance] = []
+
+        # Check for OAuth configuration
+        oauth_provider, oauth_scopes, resource_url = _get_oauth_config()
+        token_verifier = None
+        auth_settings = None
+
+        if oauth_provider == "huggingface":
+            from mcp.server.auth.settings import AuthSettings
+            from pydantic import AnyHttpUrl
+
+            from fast_agent.mcp.auth.presence import PresenceTokenVerifier
+
+            token_verifier = PresenceTokenVerifier(provider="huggingface", scopes=oauth_scopes)
+            auth_settings = AuthSettings(
+                issuer_url=AnyHttpUrl("https://huggingface.co"),
+                resource_server_url=AnyHttpUrl(resource_url),
+                required_scopes=oauth_scopes,
+            )
+            logger.info(
+                f"OAuth enabled for provider '{oauth_provider}'",
+                name="oauth_enabled",
+                provider=oauth_provider,
+                scopes=oauth_scopes,
+                resource_url=resource_url,
+            )
+
         self.mcp_server: FastMCP = FastMCP(
             name=server_name,
             instructions=server_description
             or f"This server provides access to {len(primary_instance.agents)} agents",
+            token_verifier=token_verifier,
+            auth=auth_settings,
+            host=host,
         )
+
+        # Register root route for HTTP/SSE transport info
+        @self.mcp_server.custom_route("/", methods=["GET"])
+        async def root_info(request):
+            from starlette.responses import PlainTextResponse
+
+            version = get_version("fast-agent-mcp")
+            return PlainTextResponse(
+                f"fast-agent mcp server (v{version}) - see https://fast-agent.ai for more information."
+            )
+
         if self._instance_scope == "request":
             # Ensure FastMCP does not attempt to maintain sessions for stateless mode
             self.mcp_server.settings.stateless_http = True
@@ -135,50 +203,70 @@ def register_agent_tools(self, agent_name: str) -> None:
         )
         async def send_message(message: str, ctx: MCPContext) -> str:
             """Send a message to the agent and return its response."""
-            instance = await self._acquire_instance(ctx)
-            agent = instance.app[agent_name]
-            agent_context = getattr(agent, "context", None)
-
-            # Define the function to execute
-            async def execute_send():
-                start = time.perf_counter()
-                logger.info(
-                    f"MCP request received for agent '{agent_name}'",
-                    name="mcp_request_start",
-                    agent=agent_name,
-                    session=self._session_identifier(ctx),
-                )
-                self.std_logger.info(
-                    "MCP request received for agent '%s' (scope=%s)",
-                    agent_name,
-                    self._instance_scope,
-                )
+            # Extract bearer token from auth context for token passthrough
+            from fast_agent.mcp.auth.context import request_bearer_token
 
-                response = await agent.send(message)
-                duration = time.perf_counter() - start
+            bearer_token = None
+            try:
+                from mcp.server.auth.middleware.auth_context import get_access_token
 
-                logger.info(
-                    f"Agent '{agent_name}' completed MCP request",
-                    name="mcp_request_complete",
-                    agent=agent_name,
-                    duration=duration,
-                    session=self._session_identifier(ctx),
-                )
-                self.std_logger.info(
-                    "Agent '%s' completed MCP request in %.2fs (scope=%s)",
-                    agent_name,
-                    duration,
-                    self._instance_scope,
-                )
-                return response
+                access_token = get_access_token()
+                if access_token:
+                    bearer_token = access_token.token
+            except Exception:
+                # Auth context not available (e.g., no auth configured)
+                pass
 
+            # Set the token in our contextvar for LLM provider access
+            saved_token = request_bearer_token.set(bearer_token)
             try:
-                # Execute with bridged context
-                if agent_context and ctx:
-                    return await self.with_bridged_context(agent_context, ctx, execute_send)
-                return await execute_send()
+                instance = await self._acquire_instance(ctx)
+                agent = instance.app[agent_name]
+                agent_context = getattr(agent, "context", None)
+
+                # Define the function to execute
+                async def execute_send():
+                    start = time.perf_counter()
+                    logger.info(
+                        f"MCP request received for agent '{agent_name}'",
+                        name="mcp_request_start",
+                        agent=agent_name,
+                        session=self._session_identifier(ctx),
+                    )
+                    self.std_logger.info(
+                        "MCP request received for agent '%s' (scope=%s)",
+                        agent_name,
+                        self._instance_scope,
+                    )
+
+                    response = await agent.send(message)
+                    duration = time.perf_counter() - start
+
+                    logger.info(
+                        f"Agent '{agent_name}' completed MCP request",
+                        name="mcp_request_complete",
+                        agent=agent_name,
+                        duration=duration,
+                        session=self._session_identifier(ctx),
+                    )
+                    self.std_logger.info(
+                        "Agent '%s' completed MCP request in %.2fs (scope=%s)",
+                        agent_name,
+                        duration,
+                        self._instance_scope,
+                    )
+                    return response
+
+                try:
+                    # Execute with bridged context
+                    if agent_context and ctx:
+                        return await self.with_bridged_context(agent_context, ctx, execute_send)
+                    return await execute_send()
+                finally:
+                    await self._release_instance(ctx, instance)
             finally:
-                await self._release_instance(ctx, instance)
+                # Always reset the contextvar
+                request_bearer_token.reset(saved_token)
 
         # Register a history prompt for this agent
         @self.mcp_server.prompt(
@@ -276,7 +364,9 @@ async def _maybe_refresh_shared_instance(self) -> None:
             new_instance = await self._create_instance_task()
             old_instance = self.primary_instance
             self.primary_instance = new_instance
-            self._primary_registry_version = getattr(new_instance, "registry_version", latest_version)
+            self._primary_registry_version = getattr(
+                new_instance, "registry_version", latest_version
+            )
             self._stale_instances.append(old_instance)
 
             new_agents = set(new_instance.agents.keys())
@@ -429,9 +519,7 @@ async def run_async(
                 http_transport = "sse"
             else:
                 raise ValueError("HTTP/SSE handler received stdio transport")
-            self._server_task = asyncio.create_task(
-                self._run_server_with_shutdown(http_transport)
-            )
+            self._server_task = asyncio.create_task(self._run_server_with_shutdown(http_transport))
 
             try:
                 # Wait for the server task to complete
@@ -507,7 +595,12 @@ async def tracked_connect_sse(*args, **kwargs):
             if transport == "sse":
                 await self.mcp_server.run_sse_async()
             elif transport == "http":
-                await self.mcp_server.run_streamable_http_async()
+                # Check if HF OAuth is enabled - if so, wrap app with header middleware
+                oauth_provider = os.environ.get("FAST_AGENT_SERVE_OAUTH", "").lower()
+                if oauth_provider in ("hf", "huggingface"):
+                    await self._run_http_with_hf_middleware()
+                else:
+                    await self.mcp_server.run_streamable_http_async()
         finally:
             # Cancel the monitor when the server exits
             shutdown_monitor.cancel()
@@ -516,6 +609,32 @@ async def tracked_connect_sse(*args, **kwargs):
             except asyncio.CancelledError:
                 pass
 
+    async def _run_http_with_hf_middleware(self) -> None:
+        """Run HTTP server with HuggingFace header normalization middleware.
+
+        This wraps the Starlette app with middleware that copies X-HF-Authorization
+        to Authorization header, enabling HuggingFace Spaces authentication.
+        """
+        import uvicorn
+
+        from fast_agent.mcp.auth.middleware import HFAuthHeaderMiddleware
+
+        # Get the Starlette app from FastMCP
+        starlette_app = self.mcp_server.streamable_http_app()
+
+        # Wrap with our header normalization middleware
+        wrapped_app = HFAuthHeaderMiddleware(starlette_app)
+
+        # Run uvicorn with the wrapped app
+        config = uvicorn.Config(
+            wrapped_app,
+            host=self.mcp_server.settings.host,
+            port=self.mcp_server.settings.port,
+            log_level=self.mcp_server.settings.log_level.lower(),
+        )
+        server = uvicorn.Server(config)
+        await server.serve()
+
     async def _monitor_shutdown(self):
         """Monitor for shutdown signals and coordinate proper shutdown sequence."""
         try:
@@ -573,7 +692,6 @@ async def _close_sse_connections(self):
         mcp_ext = cast("_FastMCPLocalExtensions", self.mcp_server)
         sse = getattr(mcp_ext, "_sse_transport", None)
         if sse is not None:
-
             # Close all read stream writers
             writers = list(sse._read_stream_writers.items())
             for session_id, writer in writers:
@@ -696,7 +814,6 @@ async def _cleanup_minimal(self):
         mcp_ext = cast("_FastMCPLocalExtensions", self.mcp_server)
         sse = getattr(mcp_ext, "_sse_transport", None)
         if sse is not None:
-
             # Close all read stream writers
             for session_id, writer in list(sse._read_stream_writers.items()):
                 try: