evalstate · evalstate · Jan 4, 2026 · Jan 3, 2026 · Jan 4, 2026 · Jan 4, 2026
diff --git a/examples/rag/fastagent.secrets.yaml b/examples/rag/fastagent.secrets.yaml
@@ -0,0 +1,8 @@
+# Vertex RAG Supported Regions:
+# https://docs.cloud.google.com/vertex-ai/generative-ai/docs/rag-engine/rag-overview#supported-regions
+
+google:
+  vertex_ai:
+    enabled: true
+    project_id: strato-space-ai # You project
+    location: europe-west4      # Vertex RAG Supported Regions 
diff --git a/examples/rag/pyproject.toml b/examples/rag/pyproject.toml
@@ -0,0 +1,17 @@
+[project]
+name = "fast-agent-rag"
+version = "0.1.0"
+description = "fast-agent Vertex RAG example"
+readme = "README.md"
+requires-python = ">=3.13.5"
+dependencies = [
+  "fast-agent-mcp",
+  "google-cloud-aiplatform",
+  "google-api-python-client",
+]
+
+[tool.uv]
+package = true
+
+[tool.uv.sources]
+fast-agent-mcp = { path = "../..", editable = true }
diff --git a/examples/rag/vertex-rag.py b/examples/rag/vertex-rag.py
@@ -0,0 +1,167 @@
+import asyncio
+
+import google.auth
+import vertexai
+from googleapiclient.discovery import build
+from vertexai import rag
+
+from fast_agent import FastAgent
+from fast_agent.config import get_settings
+
+# RAG quickstart: Required roles, Prepare your Google Cloud console, Run Vertex AI RAG Engine
+# https://docs.cloud.google.com/vertex-ai/generative-ai/docs/rag-engine/rag-quickstart
+#
+# Vertex AI RAG Engine overview: Overview, Supported regions, ...
+# https://docs.cloud.google.com/vertex-ai/generative-ai/docs/rag-engine/rag-overview
+#
+# [Install the Vertex AI SDK for Python
+# https://docs.cloud.google.com/vertex-ai/docs/start/install-sdk
+#
+# Admin console
+# https://console.cloud.google.com/vertex-ai/rag
+# Create a RAG Corpus, Import Files, and Generate a response
+# uv pip install google-api-python-client
+
+# TODO(developer): Update PROJECT_ID, LOCATION fastagent.config.yaml
+CONFIG_PATH = "fastagent.secrets.yaml"
+
+# google:
+#   vertex_ai:
+#     enabled: true
+#     project_id: strato-space-ai   # Your project
+#     location: europe-west4        # Netherlands, use Vertex RAG supported regions
+
+_settings = get_settings(CONFIG_PATH)
+_vertex_ai = getattr(_settings.google, "vertex_ai", {}) if _settings.google else {}
+PROJECT_ID = _vertex_ai.get("project_id")
+LOCATION = _vertex_ai.get("location")
+
+# Configure embedding model, for example "text-embedding-005".
+EMBEDDING_MODEL = "text-embedding-005"
+SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]
+
+SAMPLE_DRIVE = "1J3ubtdkmFuWDjfW3_qT2Fhsdn2pbtv-8"
+
+if not PROJECT_ID or not LOCATION:
+    raise ValueError(
+        "Missing google.vertex_ai.project_id/location in fastagent.secrets.yaml"
+    )
+
+
+def _drive_folder_name(folder_id: str) -> str:
+    credentials, _ = google.auth.default(scopes=SCOPES)
+    drive_service = build("drive", "v3", credentials=credentials)
+    payload = (
+        drive_service.files()
+        .get(
+            fileId=folder_id,
+            fields="id,name,mimeType",
+            supportsAllDrives=True,
+        )
+        .execute()
+    )
+    return payload["name"]
+
+
+# Initialize Vertex AI API once per session
+# us-central1/us-east4 require allowlist; default to a GA region.
+
+_vertex_initialized = False
+
+
+def _ensure_vertexai_init() -> None:
+    global _vertex_initialized
+    if not _vertex_initialized:
+        vertexai.init(project=PROJECT_ID, location=LOCATION)
+        _vertex_initialized = True
+
+
+def _create_and_import_corpus(
+    display_name: str,
+    paths: list[str],
+) -> rag.RagCorpus:
+
+    embedding_model_config = rag.RagEmbeddingModelConfig(
+        vertex_prediction_endpoint=rag.VertexPredictionEndpoint(
+            publisher_model=f"publishers/google/models/{EMBEDDING_MODEL}"
+        )
+    )
+    rag_corpus = rag.create_corpus(
+        display_name=display_name,
+        backend_config=rag.RagVectorDbConfig(
+            rag_embedding_model_config=embedding_model_config
+        ),
+    )
+    rag.import_files(
+        rag_corpus.name,
+        paths,
+        # Optional
+        transformation_config=rag.TransformationConfig(
+            chunking_config=rag.ChunkingConfig(
+                chunk_size=512,
+                chunk_overlap=100,
+            ),
+        ),
+        max_embedding_requests_per_min=1000,  # Optional
+    )
+    return rag_corpus
+
+
+def mini_rag(query: str, drive_id: str, top_k: int) -> object:
+    _ensure_vertexai_init()
+    if not drive_id:
+        raise ValueError("drive_id must be a non-empty Google Drive ID.")
+
+    paths = [f"https://drive.google.com/drive/folders/{drive_id}"]
+    folder_name = _drive_folder_name(drive_id)
+    key = drive_id
+    display_name = f"{folder_name} | {key}"
+
+    existing_corpus = None
+    for corpus in rag.list_corpora():
+        if corpus.display_name and key in corpus.display_name:
+            existing_corpus = corpus
+            break
+    if existing_corpus:
+        rag_corpus = existing_corpus
+    else:
+        rag_corpus = _create_and_import_corpus(
+            display_name,
+            paths,
+        )
+
+    rag_retrieval_config = rag.RagRetrievalConfig(
+        top_k=top_k,  # Optional
+        filter=rag.Filter(vector_distance_threshold=0.5),  # Optional
+    )
+    return rag.retrieval_query(
+        rag_resources=[
+            rag.RagResource(
+                rag_corpus=rag_corpus.name,
+                # Optional: supply IDs from `rag.list_files()`.
+                # rag_file_ids=["rag-file-1", "rag-file-2", ...],
+            )
+        ],
+        text=query,
+        rag_retrieval_config=rag_retrieval_config,
+    )
+
+
+fast = FastAgent("Google Vertex RAG - Index google drive id to RAG")
+
+
+@fast.agent(
+    name="vertex rag",
+    function_tools=[mini_rag],
+)
+async def main():
+    async with fast.run() as agent:
+        result = await agent(
+            f"Produce a short top 5 prioritized list about customer pain points. From RAG, select 50 relevant chunks about customer pain points. Deduplicate. Links: [name](<link>). Compact output. Drive ID: {SAMPLE_DRIVE}."
+        )
+        print(result)
+        # await agent.interactive()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/hf-space-deployer.skill b/hf-space-deployer.skill
diff --git a/plan/agent-card-origin-review.md b/plan/agent-card-origin-review.md
@@ -0,0 +1,20 @@
+# Code Review: origin/feat/agent-card (pre-pull)
+
+Branch status: on `feat/agent-card` with local uncommitted changes. The branch is behind `origin/feat/agent-card` by 4 commits; review is based on `git diff HEAD..origin/feat/agent-card` without pulling.
+
+## Findings
+
+- `src/fast_agent/agents/tool_agent.py:446` — `ToolExecutionHandler.on_tool_start` is called with `tool_use_id=None`. For ACP progress/streaming, this prevents correlating tool calls to the LLM `tool_use_id` (potential duplicate or unlinked tool notifications). If intentional, document it; otherwise plumb `tool_use_id` through `run_tools` → `call_tool`.
+- `src/fast_agent/core/fastagent.py:446` and `src/fast_agent/core/fastagent.py:595` — CLI `--model` override is no longer applied to AgentCard-loaded agents. This changes precedence vs the RFC (CLI → AgentCard → config). If the intent is “AgentCard model wins,” update docs/spec; otherwise treat as regression.
+- `src/fast_agent/agents/tool_agent.py:90` — `_clone_constructor_kwargs` passes `FastMCPTool` instances directly into clones. If any tool closures capture parent state (for example, `add_agent_tool`), clones will still dispatch to the original child template, not a per-clone graph. This may be fine, but it weakens the “detached per-call clones” story for nested agent-tools.
+
+## Missing tests
+
+- No test coverage for the new ToolExecutionHandler progress flow (`on_tool_start` / `on_tool_progress` / `on_tool_complete`) or for tool propagation into detached clones. A focused unit test would help guard ACP tool-progress behavior and clone tool availability.
+
+## Questions / assumptions
+
+- Is the CLI `--model` override supposed to lose precedence over AgentCard models now? If yes, update the RFC and CLI docs; if no, treat as a regression.
+- Should local tool execution be correlated to LLM `tool_use_id` in ACP progress? If yes, thread it through `run_tools`/`call_tool`.
+
+If you want me to pull and merge the origin changes, tell me whether to commit or stash the local modifications first.
diff --git a/plan/agentcard-standards-mini-article.md b/plan/agentcard-standards-mini-article.md
@@ -0,0 +1,129 @@
+# AgentCard at the Summit: The Multi-Agent Standardization Revolution
+By github.com/evalstate, github.com/iqdoctor
+Draft
+
+LLM platforms have been climbing a clear ladder: plain text completions, chat completions, tool invocations, and then the MCP revolution. Each step widened what models could do, but also revealed a new bottleneck. Tools brought power, and MCP brought connectivity, yet we still struggle with large, brittle wrappers, heavy context costs, and inconsistent packaging. The next plateau is not just more tools. It is standardization: skills that can move, compose, and scale. AgentCard is a credible summit for that climb.
+
+## The hidden cost of "just add tools"
+
+MCP made it easy to plug in any tool server, but the price shows up immediately in context. Large tool schemas can consume tens of thousands of tokens before the agent does any real work. This is not just waste. It weakens continuity, degrades selection accuracy, and forces frequent context resets. The fast-agent "advanced tool use" proposal responds to the same pain Anthropic highlighted: stop loading everything up front and move to on-demand discovery and selective schema hydration.
+
+That perspective reframes the problem: a tool surface should be thin and dynamic. A proxy or runtime can expose only a minimal meta-tool interface (discover, learn, execute), and load full schemas only when required. This reduces context bloat, improves correctness, and aligns with least-privilege configuration using AgentCard-style allowlists.
+
+## AgentCard RFC
+RFC reference: <https://github.com/evalstate/fast-agent/blob/main/plan/agent-card-rfc.md>
+
+This direction tracks real-world practice: many platforms and developers already store prompts in Markdown files, and many of those pair the prompt body with YAML frontmatter to instantiate an agent. The RFC is grounded in the fast-agent.ai workflow system and is informed by Anthropic’s “Building Effective Agents” (<https://www.anthropic.com/engineering/building-effective-agents>) and the OpenAI “Agents as Tools” paradigm (<https://openai.github.io/openai-agents-python/tools/#agents-as-tools>), allowing user-defined agents to call each other. That combination is why the spec aims not just for simplicity, but also for completeness. The RFC documents MCP integration and notes function tools in a separate spec. Tool filtering methods for saving context are also described.
+
+## What the RFC nails down (concrete, not vibes)
+
+AgentCard is a text-first format (`.md` or `.yaml`) that compiles into a single canonical IR: `AgentConfig`. The RFC defines a strict surface:
+
+- One card per file by default; multi-card files are optional/experimental.
+- Strict validation by `type`; unknown fields are rejected.
+- Minimal attributes: `type`, `name`, `instruction`.
+- `description` is optional and becomes the tool description when agents are exposed as tools.
+- `instruction` is either the body or the `instruction` field (never both).
+- `schema_version` is optional (int); defaults to 1.
+- Runtime wiring fields are explicit: `servers` select MCP endpoints by name, `tools` allowlist tools per server, `agents` declare child agents for routing/orchestration, and `messages` point to external history files.
+- History preload formats are defined (JSON PromptMessageExtended, or delimited text/Markdown with role markers).
+- Supported types: `agent`, `chain`, `parallel`, `evaluator_optimizer`, `router`, `orchestrator`, `iterative_planner`, `MAKER`, which define the base agent or specialized workflows.
+
+AgentCard is not the same thing as a skill. The RFC draws the line clearly: a **Skill** is a reusable prompt fragment or capability description, while an **AgentCard** is a full runtime configuration (model, servers, tools, history source, instruction). That makes AgentCard the manifest layer above skills.
+
+## Skills as portable expertise
+
+The video "Don't Build Agents, Build Skills Instead" (https://www.youtube.com/watch?v=CEvIs9y1uog) frames the deeper issue: agents are smart but not expert. Intelligence without procedural knowledge leads to fragile outcomes. The proposed solution is simple and powerful: skills are just folders. A skill is a redistributable package of code, prompts, and documentation, with a progressive disclosure model where the agent reads only the small metadata until it needs deeper instructions.
+
+This turns expertise into something tangible: versioned in Git, shared in a ZIP, and improved over time. It also flips the cost of context. Instead of flooding the model with everything, we keep a tiny surface in memory and pull in the rest only when a task truly needs it.
+
+## Spec snapshots (from the RFC)
+
+Basic Markdown card:
+
+```md
+---
+type: agent
+name: sizer
+---
+Given an object, respond only with an estimate of its size.
+```
+
+Agent with servers, tools, and child agents:
+
+```md
+---
+type: agent
+name: PMO-orchestrator
+servers:
+  - time
+  - github
+agents:
+  - NY-Project-Manager
+  - London-Project-Manager
+tools:
+  time: [get_time]
+  github: [search_*]
+---
+Get reports. Always use one tool call per project/news.
+Responsibilities: NY projects: [OpenAI, Fast-Agent, Anthropic].
+London news: [Economics, Art, Culture].
+Aggregate results and add a one-line PMO summary.
+```
+
+Agent with external history preload:
+
+```md
+---
+type: agent
+name: analyst
+messages: ./history.md
+---
+You are a concise analyst.
+```
+
+## From SKILL.md to AgentCard
+
+If SKILL.md is the minimal contract for a redistributable skill set (meta + prompt + code), then AgentCard is the next abstraction: meta + agent prompt + skills + workflows. It does not replace skills; it composes them. In that sense, AgentCard is to agent systems what a package manifest is to a codebase: a stable description of dependencies, behaviors, and integration points.
+
+Think of the historical ladder:
+
+- LLM completions -> chat completions -> tool invocations
+- MCP as a connectivity standard
+- SKILL.md as a portable skill container
+- AgentCard as a higher-order manifest: prompts, skills, workflows, policies
+
+Each step reduces friction in a different layer. SKILL.md makes expertise portable. AgentCard makes multi-agent systems interoperable.
+
+## Why AgentCard looks like the summit
+
+Standardization only works if the surface is small and composable. AgentCard aims for that. It can point to skills, define workflows, and express policies without re-encoding the world in a giant wrapper. That keeps implementations lean and lowers the cost of maintenance. It also aligns with the fast-agent view: keep tool surfaces small, discover capabilities on demand, and avoid pushing every schema into the context window.
+
+Once you adopt a minimal AgentCard surface, a new distribution model emerges. Imagine an Agent Archive (AAR) that bundles:
+
+- AgentCard metadata
+- SKILL.md folders
+- workflows and examples
+- optional tests and evaluation scripts
+
+This is the agent-era analog of a Java JAR: a single file that can be shared, versioned, and executed by any compatible runtime.
+
+## A practical engine for the next phase
+
+The fast-agent runtime illustrates how the engine can do the heavy lifting. It can filter tools by policy, defer schema loading, and proxy multiple MCP servers while exposing a tiny model-facing surface. That matters because it keeps the agent lightweight and the system scalable.
+
+The net effect is the same pattern we already see in engineering: smaller interfaces, more reusable components, and better performance. In practice, function tools often cut both development time and context usage compared to a traditional MCP-only loop, especially when combined with skills and on-demand discovery.
+
+## Conclusion
+
+We are watching a standardization revolution unfold. LLMs brought general intelligence; MCP connected them to the world; skills turned experience into reusable code. AgentCard can be the summit that ties it all together: a minimal, portable interface that composes skills, workflows, and policies into a system others can run and extend.
+
+If SKILL.md makes expertise portable, AgentCard makes multi-agent systems interoperable. That is not just a nice idea. It is the prerequisite for redistribution, reuse, and scale.
+
+## References
+
+- AgentCard RFC: <https://github.com/evalstate/fast-agent/blob/main/plan/agent-card-rfc.md>
+- Model Context Protocol: <https://modelcontextprotocol.io>
+- Anthropic: Building Effective Agents: <https://www.anthropic.com/engineering/building-effective-agents>
+- OpenAI Agents SDK: Agents as Tools: <https://openai.github.io/openai-agents-python/tools/#agents-as-tools>
+- Video: "Don't Build Agents, Build Skills Instead" <https://www.youtube.com/watch?v=CEvIs9y1uog>
diff --git a/src/fast_agent/core/direct_decorators.py b/src/fast_agent/core/direct_decorators.py
@@ -19,7 +19,12 @@
 from mcp.client.session import ElicitationFnT
 from pydantic import AnyUrl
 
-from fast_agent.agents.agent_types import AgentConfig, AgentType, SkillConfig
+from fast_agent.agents.agent_types import (
+    AgentConfig,
+    AgentType,
+    FunctionToolsConfig,
+    SkillConfig,
+)
 from fast_agent.agents.workflow.iterative_planner import ITERATIVE_PLAN_SYSTEM_PROMPT_TEMPLATE
 from fast_agent.agents.workflow.router_agent import (
     ROUTING_SYSTEM_INSTRUCTION,
@@ -228,6 +233,7 @@ def decorator(func: Callable[P, Coroutine[Any, Any, R]]) -> Callable[P, Coroutin
             default=default,
             elicitation_handler=extra_kwargs.get("elicitation_handler"),
             api_key=extra_kwargs.get("api_key"),
+            function_tools=extra_kwargs.get("function_tools"),
         )
 
         # Update request params if provided
@@ -271,6 +277,7 @@ def agent(
     resources: dict[str, list[str]] | None = None,
     prompts: dict[str, list[str]] | None = None,
     skills: SkillConfig = SKILLS_DEFAULT,
+    function_tools: FunctionToolsConfig = None,
     model: str | None = None,
     use_history: bool = True,
     request_params: RequestParams | None = None,
@@ -294,6 +301,7 @@ def agent(
         tools: Optional list of tool names or patterns to include
         resources: Optional list of resource names or patterns to include
         prompts: Optional list of prompt names or patterns to include
+        function_tools: Optional list of Python function tools to include
         model: Model specification string
         use_history: Whether to maintain conversation history
         request_params: Additional request parameters for the LLM
@@ -327,6 +335,7 @@ def agent(
         resources=resources,
         prompts=prompts,
         skills=skills,
+        function_tools=function_tools,
         api_key=api_key,
         agents_as_tools_options={
             "history_mode": history_mode,