Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions examples/rag/fastagent.secrets.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Vertex RAG Supported Regions:
# https://docs.cloud.google.com/vertex-ai/generative-ai/docs/rag-engine/rag-overview#supported-regions

google:
vertex_ai:
enabled: true
project_id: strato-space-ai # You project
location: europe-west4 # Vertex RAG Supported Regions
17 changes: 17 additions & 0 deletions examples/rag/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[project]
name = "fast-agent-rag"
version = "0.1.0"
description = "fast-agent Vertex RAG example"
readme = "README.md"
requires-python = ">=3.13.5"
dependencies = [
"fast-agent-mcp",
"google-cloud-aiplatform",
"google-api-python-client",
]

[tool.uv]
package = true

[tool.uv.sources]
fast-agent-mcp = { path = "../..", editable = true }
167 changes: 167 additions & 0 deletions examples/rag/vertex-rag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
import asyncio

import google.auth
import vertexai
from googleapiclient.discovery import build
from vertexai import rag

from fast_agent import FastAgent
from fast_agent.config import get_settings

# RAG quickstart: Required roles, Prepare your Google Cloud console, Run Vertex AI RAG Engine
# https://docs.cloud.google.com/vertex-ai/generative-ai/docs/rag-engine/rag-quickstart
#
# Vertex AI RAG Engine overview: Overview, Supported regions, ...
# https://docs.cloud.google.com/vertex-ai/generative-ai/docs/rag-engine/rag-overview
#
# [Install the Vertex AI SDK for Python
# https://docs.cloud.google.com/vertex-ai/docs/start/install-sdk
#
# Admin console
# https://console.cloud.google.com/vertex-ai/rag
# Create a RAG Corpus, Import Files, and Generate a response
# uv pip install google-api-python-client

# TODO(developer): Update PROJECT_ID, LOCATION fastagent.config.yaml
CONFIG_PATH = "fastagent.secrets.yaml"

# google:
# vertex_ai:
# enabled: true
# project_id: strato-space-ai # Your project
# location: europe-west4 # Netherlands, use Vertex RAG supported regions

_settings = get_settings(CONFIG_PATH)
_vertex_ai = getattr(_settings.google, "vertex_ai", {}) if _settings.google else {}
PROJECT_ID = _vertex_ai.get("project_id")
LOCATION = _vertex_ai.get("location")

# Configure embedding model, for example "text-embedding-005".
EMBEDDING_MODEL = "text-embedding-005"
SCOPES = ["https://www.googleapis.com/auth/drive.readonly"]

SAMPLE_DRIVE = "1J3ubtdkmFuWDjfW3_qT2Fhsdn2pbtv-8"

if not PROJECT_ID or not LOCATION:
raise ValueError(
"Missing google.vertex_ai.project_id/location in fastagent.secrets.yaml"
)


def _drive_folder_name(folder_id: str) -> str:
credentials, _ = google.auth.default(scopes=SCOPES)
drive_service = build("drive", "v3", credentials=credentials)
payload = (
drive_service.files()
.get(
fileId=folder_id,
fields="id,name,mimeType",
supportsAllDrives=True,
)
.execute()
)
return payload["name"]


# Initialize Vertex AI API once per session
# us-central1/us-east4 require allowlist; default to a GA region.

_vertex_initialized = False


def _ensure_vertexai_init() -> None:
global _vertex_initialized
if not _vertex_initialized:
vertexai.init(project=PROJECT_ID, location=LOCATION)
_vertex_initialized = True


def _create_and_import_corpus(
display_name: str,
paths: list[str],
) -> rag.RagCorpus:

embedding_model_config = rag.RagEmbeddingModelConfig(
vertex_prediction_endpoint=rag.VertexPredictionEndpoint(
publisher_model=f"publishers/google/models/{EMBEDDING_MODEL}"
)
)
rag_corpus = rag.create_corpus(
display_name=display_name,
backend_config=rag.RagVectorDbConfig(
rag_embedding_model_config=embedding_model_config
),
)
rag.import_files(
rag_corpus.name,
paths,
# Optional
transformation_config=rag.TransformationConfig(
chunking_config=rag.ChunkingConfig(
chunk_size=512,
chunk_overlap=100,
),
),
max_embedding_requests_per_min=1000, # Optional
)
return rag_corpus


def mini_rag(query: str, drive_id: str, top_k: int) -> object:
_ensure_vertexai_init()
if not drive_id:
raise ValueError("drive_id must be a non-empty Google Drive ID.")

paths = [f"https://drive.google.com/drive/folders/{drive_id}"]
folder_name = _drive_folder_name(drive_id)
key = drive_id
display_name = f"{folder_name} | {key}"

existing_corpus = None
for corpus in rag.list_corpora():
if corpus.display_name and key in corpus.display_name:
existing_corpus = corpus
break
if existing_corpus:
rag_corpus = existing_corpus
else:
rag_corpus = _create_and_import_corpus(
display_name,
paths,
)

rag_retrieval_config = rag.RagRetrievalConfig(
top_k=top_k, # Optional
filter=rag.Filter(vector_distance_threshold=0.5), # Optional
)
return rag.retrieval_query(
rag_resources=[
rag.RagResource(
rag_corpus=rag_corpus.name,
# Optional: supply IDs from `rag.list_files()`.
# rag_file_ids=["rag-file-1", "rag-file-2", ...],
)
],
text=query,
rag_retrieval_config=rag_retrieval_config,
)


fast = FastAgent("Google Vertex RAG - Index google drive id to RAG")


@fast.agent(
name="vertex rag",
function_tools=[mini_rag],
)
async def main():
async with fast.run() as agent:
result = await agent(
f"Produce a short top 5 prioritized list about customer pain points. From RAG, select 50 relevant chunks about customer pain points. Deduplicate. Links: [name](<link>). Compact output. Drive ID: {SAMPLE_DRIVE}."
)
print(result)
# await agent.interactive()


if __name__ == "__main__":
asyncio.run(main())
Binary file modified hf-space-deployer.skill
Binary file not shown.
20 changes: 20 additions & 0 deletions plan/agent-card-origin-review.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Code Review: origin/feat/agent-card (pre-pull)

Branch status: on `feat/agent-card` with local uncommitted changes. The branch is behind `origin/feat/agent-card` by 4 commits; review is based on `git diff HEAD..origin/feat/agent-card` without pulling.

## Findings

- `src/fast_agent/agents/tool_agent.py:446` — `ToolExecutionHandler.on_tool_start` is called with `tool_use_id=None`. For ACP progress/streaming, this prevents correlating tool calls to the LLM `tool_use_id` (potential duplicate or unlinked tool notifications). If intentional, document it; otherwise plumb `tool_use_id` through `run_tools` → `call_tool`.
- `src/fast_agent/core/fastagent.py:446` and `src/fast_agent/core/fastagent.py:595` — CLI `--model` override is no longer applied to AgentCard-loaded agents. This changes precedence vs the RFC (CLI → AgentCard → config). If the intent is “AgentCard model wins,” update docs/spec; otherwise treat as regression.
- `src/fast_agent/agents/tool_agent.py:90` — `_clone_constructor_kwargs` passes `FastMCPTool` instances directly into clones. If any tool closures capture parent state (for example, `add_agent_tool`), clones will still dispatch to the original child template, not a per-clone graph. This may be fine, but it weakens the “detached per-call clones” story for nested agent-tools.

## Missing tests

- No test coverage for the new ToolExecutionHandler progress flow (`on_tool_start` / `on_tool_progress` / `on_tool_complete`) or for tool propagation into detached clones. A focused unit test would help guard ACP tool-progress behavior and clone tool availability.

## Questions / assumptions

- Is the CLI `--model` override supposed to lose precedence over AgentCard models now? If yes, update the RFC and CLI docs; if no, treat as a regression.
- Should local tool execution be correlated to LLM `tool_use_id` in ACP progress? If yes, thread it through `run_tools`/`call_tool`.

If you want me to pull and merge the origin changes, tell me whether to commit or stash the local modifications first.
129 changes: 129 additions & 0 deletions plan/agentcard-standards-mini-article.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# AgentCard at the Summit: The Multi-Agent Standardization Revolution
By github.com/evalstate, github.com/iqdoctor
Draft

LLM platforms have been climbing a clear ladder: plain text completions, chat completions, tool invocations, and then the MCP revolution. Each step widened what models could do, but also revealed a new bottleneck. Tools brought power, and MCP brought connectivity, yet we still struggle with large, brittle wrappers, heavy context costs, and inconsistent packaging. The next plateau is not just more tools. It is standardization: skills that can move, compose, and scale. AgentCard is a credible summit for that climb.

## The hidden cost of "just add tools"

MCP made it easy to plug in any tool server, but the price shows up immediately in context. Large tool schemas can consume tens of thousands of tokens before the agent does any real work. This is not just waste. It weakens continuity, degrades selection accuracy, and forces frequent context resets. The fast-agent "advanced tool use" proposal responds to the same pain Anthropic highlighted: stop loading everything up front and move to on-demand discovery and selective schema hydration.

That perspective reframes the problem: a tool surface should be thin and dynamic. A proxy or runtime can expose only a minimal meta-tool interface (discover, learn, execute), and load full schemas only when required. This reduces context bloat, improves correctness, and aligns with least-privilege configuration using AgentCard-style allowlists.

## AgentCard RFC
RFC reference: <https://github.com/evalstate/fast-agent/blob/main/plan/agent-card-rfc.md>

This direction tracks real-world practice: many platforms and developers already store prompts in Markdown files, and many of those pair the prompt body with YAML frontmatter to instantiate an agent. The RFC is grounded in the fast-agent.ai workflow system and is informed by Anthropic’s “Building Effective Agents” (<https://www.anthropic.com/engineering/building-effective-agents>) and the OpenAI “Agents as Tools” paradigm (<https://openai.github.io/openai-agents-python/tools/#agents-as-tools>), allowing user-defined agents to call each other. That combination is why the spec aims not just for simplicity, but also for completeness. The RFC documents MCP integration and notes function tools in a separate spec. Tool filtering methods for saving context are also described.

## What the RFC nails down (concrete, not vibes)

AgentCard is a text-first format (`.md` or `.yaml`) that compiles into a single canonical IR: `AgentConfig`. The RFC defines a strict surface:

- One card per file by default; multi-card files are optional/experimental.
- Strict validation by `type`; unknown fields are rejected.
- Minimal attributes: `type`, `name`, `instruction`.
- `description` is optional and becomes the tool description when agents are exposed as tools.
- `instruction` is either the body or the `instruction` field (never both).
- `schema_version` is optional (int); defaults to 1.
- Runtime wiring fields are explicit: `servers` select MCP endpoints by name, `tools` allowlist tools per server, `agents` declare child agents for routing/orchestration, and `messages` point to external history files.
- History preload formats are defined (JSON PromptMessageExtended, or delimited text/Markdown with role markers).
- Supported types: `agent`, `chain`, `parallel`, `evaluator_optimizer`, `router`, `orchestrator`, `iterative_planner`, `MAKER`, which define the base agent or specialized workflows.

AgentCard is not the same thing as a skill. The RFC draws the line clearly: a **Skill** is a reusable prompt fragment or capability description, while an **AgentCard** is a full runtime configuration (model, servers, tools, history source, instruction). That makes AgentCard the manifest layer above skills.

## Skills as portable expertise

The video "Don't Build Agents, Build Skills Instead" (https://www.youtube.com/watch?v=CEvIs9y1uog) frames the deeper issue: agents are smart but not expert. Intelligence without procedural knowledge leads to fragile outcomes. The proposed solution is simple and powerful: skills are just folders. A skill is a redistributable package of code, prompts, and documentation, with a progressive disclosure model where the agent reads only the small metadata until it needs deeper instructions.

This turns expertise into something tangible: versioned in Git, shared in a ZIP, and improved over time. It also flips the cost of context. Instead of flooding the model with everything, we keep a tiny surface in memory and pull in the rest only when a task truly needs it.

## Spec snapshots (from the RFC)

Basic Markdown card:

```md
---
type: agent
name: sizer
---
Given an object, respond only with an estimate of its size.
```

Agent with servers, tools, and child agents:

```md
---
type: agent
name: PMO-orchestrator
servers:
- time
- github
agents:
- NY-Project-Manager
- London-Project-Manager
tools:
time: [get_time]
github: [search_*]
---
Get reports. Always use one tool call per project/news.
Responsibilities: NY projects: [OpenAI, Fast-Agent, Anthropic].
London news: [Economics, Art, Culture].
Aggregate results and add a one-line PMO summary.
```

Agent with external history preload:

```md
---
type: agent
name: analyst
messages: ./history.md
---
You are a concise analyst.
```

## From SKILL.md to AgentCard

If SKILL.md is the minimal contract for a redistributable skill set (meta + prompt + code), then AgentCard is the next abstraction: meta + agent prompt + skills + workflows. It does not replace skills; it composes them. In that sense, AgentCard is to agent systems what a package manifest is to a codebase: a stable description of dependencies, behaviors, and integration points.

Think of the historical ladder:

- LLM completions -> chat completions -> tool invocations
- MCP as a connectivity standard
- SKILL.md as a portable skill container
- AgentCard as a higher-order manifest: prompts, skills, workflows, policies

Each step reduces friction in a different layer. SKILL.md makes expertise portable. AgentCard makes multi-agent systems interoperable.

## Why AgentCard looks like the summit

Standardization only works if the surface is small and composable. AgentCard aims for that. It can point to skills, define workflows, and express policies without re-encoding the world in a giant wrapper. That keeps implementations lean and lowers the cost of maintenance. It also aligns with the fast-agent view: keep tool surfaces small, discover capabilities on demand, and avoid pushing every schema into the context window.

Once you adopt a minimal AgentCard surface, a new distribution model emerges. Imagine an Agent Archive (AAR) that bundles:

- AgentCard metadata
- SKILL.md folders
- workflows and examples
- optional tests and evaluation scripts

This is the agent-era analog of a Java JAR: a single file that can be shared, versioned, and executed by any compatible runtime.

## A practical engine for the next phase

The fast-agent runtime illustrates how the engine can do the heavy lifting. It can filter tools by policy, defer schema loading, and proxy multiple MCP servers while exposing a tiny model-facing surface. That matters because it keeps the agent lightweight and the system scalable.

The net effect is the same pattern we already see in engineering: smaller interfaces, more reusable components, and better performance. In practice, function tools often cut both development time and context usage compared to a traditional MCP-only loop, especially when combined with skills and on-demand discovery.

## Conclusion

We are watching a standardization revolution unfold. LLMs brought general intelligence; MCP connected them to the world; skills turned experience into reusable code. AgentCard can be the summit that ties it all together: a minimal, portable interface that composes skills, workflows, and policies into a system others can run and extend.

If SKILL.md makes expertise portable, AgentCard makes multi-agent systems interoperable. That is not just a nice idea. It is the prerequisite for redistribution, reuse, and scale.

## References

- AgentCard RFC: <https://github.com/evalstate/fast-agent/blob/main/plan/agent-card-rfc.md>
- Model Context Protocol: <https://modelcontextprotocol.io>
- Anthropic: Building Effective Agents: <https://www.anthropic.com/engineering/building-effective-agents>
- OpenAI Agents SDK: Agents as Tools: <https://openai.github.io/openai-agents-python/tools/#agents-as-tools>
- Video: "Don't Build Agents, Build Skills Instead" <https://www.youtube.com/watch?v=CEvIs9y1uog>
11 changes: 10 additions & 1 deletion src/fast_agent/core/direct_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
from mcp.client.session import ElicitationFnT
from pydantic import AnyUrl

from fast_agent.agents.agent_types import AgentConfig, AgentType, SkillConfig
from fast_agent.agents.agent_types import (
AgentConfig,
AgentType,
FunctionToolsConfig,
SkillConfig,
)
from fast_agent.agents.workflow.iterative_planner import ITERATIVE_PLAN_SYSTEM_PROMPT_TEMPLATE
from fast_agent.agents.workflow.router_agent import (
ROUTING_SYSTEM_INSTRUCTION,
Expand Down Expand Up @@ -228,6 +233,7 @@ def decorator(func: Callable[P, Coroutine[Any, Any, R]]) -> Callable[P, Coroutin
default=default,
elicitation_handler=extra_kwargs.get("elicitation_handler"),
api_key=extra_kwargs.get("api_key"),
function_tools=extra_kwargs.get("function_tools"),
)

# Update request params if provided
Expand Down Expand Up @@ -271,6 +277,7 @@ def agent(
resources: dict[str, list[str]] | None = None,
prompts: dict[str, list[str]] | None = None,
skills: SkillConfig = SKILLS_DEFAULT,
function_tools: FunctionToolsConfig = None,
model: str | None = None,
use_history: bool = True,
request_params: RequestParams | None = None,
Expand All @@ -294,6 +301,7 @@ def agent(
tools: Optional list of tool names or patterns to include
resources: Optional list of resource names or patterns to include
prompts: Optional list of prompt names or patterns to include
function_tools: Optional list of Python function tools to include
model: Model specification string
use_history: Whether to maintain conversation history
request_params: Additional request parameters for the LLM
Expand Down Expand Up @@ -327,6 +335,7 @@ def agent(
resources=resources,
prompts=prompts,
skills=skills,
function_tools=function_tools,
api_key=api_key,
agents_as_tools_options={
"history_mode": history_mode,
Expand Down
Loading
Loading