From 35ac4360355e84bfeafd107ae13beb29631405de Mon Sep 17 00:00:00 2001 From: Tapan Chugh Date: Mon, 24 Nov 2025 18:04:21 -0800 Subject: [PATCH 1/2] Update BFCL submodule to latest (06ae1c7) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Includes fix for holdout function index in missed_function test cases. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- tests/benchmarks/bfcl/data | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/benchmarks/bfcl/data b/tests/benchmarks/bfcl/data index d85e9ad..06ae1c7 160000 --- a/tests/benchmarks/bfcl/data +++ b/tests/benchmarks/bfcl/data @@ -1 +1 @@ -Subproject commit d85e9ad3a3009be3d609c5550799389a16599f88 +Subproject commit 06ae1c763fab71a899730d33e674e4724c3af63c From 50bb99eb154ccbf7398e2c5863a7decaca786522 Mon Sep 17 00:00:00 2001 From: Tapan Chugh Date: Mon, 24 Nov 2025 22:04:49 -0800 Subject: [PATCH 2/2] Patch MCP tools with BFCL's function documentation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit FastMCP introspection doesn't extract parameter descriptions from docstrings into the JSON schema. BFCL provides pre-compiled JSON func docs with proper descriptions, so we now overlay them after registering tools. This ensures tool descriptions include the API prefix and parameter descriptions are properly in the schema where models expect them. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- tests/benchmarks/bfcl/mcp_server.py | 127 ++++++++++++++++++---------- 1 file changed, 84 insertions(+), 43 deletions(-) diff --git a/tests/benchmarks/bfcl/mcp_server.py b/tests/benchmarks/bfcl/mcp_server.py index 0dea610..26c9a12 100644 --- a/tests/benchmarks/bfcl/mcp_server.py +++ b/tests/benchmarks/bfcl/mcp_server.py @@ -12,40 +12,81 @@ import sys from typing import Any +from bfcl_eval.constants.eval_config import MULTI_TURN_FUNC_DOC_PATH from bfcl_eval.constants.executable_backend_config import ( CLASS_FILE_PATH_MAPPING, + MULTI_TURN_FUNC_DOC_FILE_MAPPING, STATELESS_CLASSES, ) from mcp.server.fastmcp import FastMCP -def load_api_class(target_class_name: str) -> Any: - """Load the specified API class dynamically and return instance.""" - if target_class_name not in CLASS_FILE_PATH_MAPPING: - raise ValueError(f"Unknown class: {target_class_name}") +def load_api_class(class_name: str) -> Any: + """Load and instantiate the specified API class.""" + module = importlib.import_module(CLASS_FILE_PATH_MAPPING[class_name]) + return getattr(module, class_name)() - # Load the class - module = importlib.import_module(CLASS_FILE_PATH_MAPPING[target_class_name]) - instance = getattr(module, target_class_name)() - return instance +def load_func_docs(class_name: str) -> dict[str, dict[str, Any]]: + """Load BFCL's function documentation for a class. -def load_scenario_from_test(test_file: str, test_id: str, target_class_name: str) -> dict[str, Any]: - """Load scenario configuration from test file.""" - scenario = {} - if test_file and test_id: - try: - with open(test_file) as f: - for line in f: - if line.strip(): - entry = json.loads(line) - if entry.get("id") == test_id: - if "initial_config" in entry and target_class_name in entry["initial_config"]: - scenario = entry["initial_config"][target_class_name] - break - except Exception as e: - print(f"Warning: Could not load scenario: {e}", file=sys.stderr) - return scenario + Returns a dict mapping function names to their full documentation, + including rich descriptions and parameter schemas. + """ + if class_name not in MULTI_TURN_FUNC_DOC_FILE_MAPPING: + return {} + + doc_path = MULTI_TURN_FUNC_DOC_PATH / MULTI_TURN_FUNC_DOC_FILE_MAPPING[class_name] + if not doc_path.exists(): + return {} + + docs = {} + with open(doc_path) as f: + for line in f: + if line.strip(): + doc = json.loads(line) + docs[doc["name"]] = doc + return docs + + +def load_scenario_from_test(test_file: str, test_id: str, class_name: str) -> dict[str, Any]: + """Load initial scenario configuration from a test file.""" + if not test_file or not test_id: + return {} + + with open(test_file) as f: + for line in f: + if line.strip(): + entry = json.loads(line) + if entry.get("id") == test_id: + config: dict[str, Any] = entry.get("initial_config", {}).get(class_name, {}) + return config + + return {} + + +def patch_tool_with_func_doc(server: FastMCP, func_docs: dict[str, dict[str, Any]]) -> None: + """Patch registered tools with BFCL's richer function documentation. + + FastMCP's introspection doesn't extract parameter descriptions from docstrings. + BFCL provides pre-compiled JSON docs with proper descriptions, so we overlay them. + """ + for tool_name, tool in server._tool_manager._tools.items(): + if tool_name not in func_docs: + continue + + doc = func_docs[tool_name] + + # Patch tool description + tool.description = doc.get("description", tool.description) + + # Patch parameter descriptions + doc_params = doc.get("parameters", {}).get("properties", {}) + tool_params = tool.parameters.get("properties", {}) + + for param_name, param_doc in doc_params.items(): + if param_name in tool_params and "description" in param_doc: + tool_params[param_name]["description"] = param_doc["description"] async def main() -> None: @@ -53,37 +94,37 @@ async def main() -> None: parser.add_argument("class_name", help="API class name to load") parser.add_argument("test_file", nargs="?", help="Test file path (optional)") parser.add_argument("test_id", nargs="?", help="Test ID (optional)") - args = parser.parse_args() - target_class_name = args.class_name + class_name = args.class_name - if target_class_name not in CLASS_FILE_PATH_MAPPING: - print("Usage: python api_server.py [test_file.json test_id]", file=sys.stderr) + if class_name not in CLASS_FILE_PATH_MAPPING: + print("Usage: python mcp_server.py [test_file.json test_id]", file=sys.stderr) print(f"Available classes: {', '.join(CLASS_FILE_PATH_MAPPING.keys())}", file=sys.stderr) sys.exit(1) - try: - api_instance = load_api_class(target_class_name) - print(f"Successfully loaded {target_class_name}", file=sys.stderr) + # Load the API class + api = load_api_class(class_name) + print(f"Loaded {class_name}", file=sys.stderr) - # Load scenario if needed - if hasattr(api_instance, "_load_scenario") and target_class_name not in STATELESS_CLASSES: - scenario = load_scenario_from_test(args.test_file, args.test_id, target_class_name) - api_instance._load_scenario(scenario) - except Exception as e: - print(f"Error loading {target_class_name}: {e}", file=sys.stderr) - sys.exit(1) + # Initialize scenario state if needed + if hasattr(api, "_load_scenario") and class_name not in STATELESS_CLASSES: + scenario = load_scenario_from_test(args.test_file, args.test_id, class_name) + api._load_scenario(scenario) - # Create FastMCP server - server = FastMCP(f"{target_class_name.lower()}-api") + # Load BFCL's function documentation + func_docs = load_func_docs(class_name) - # Register all API methods as tools - for method_name, method in inspect.getmembers(api_instance, predicate=inspect.ismethod): + # Create server and register tools + server = FastMCP(f"{class_name.lower()}-api") + + for method_name, method in inspect.getmembers(api, predicate=inspect.ismethod): if not method_name.startswith("_"): server.add_tool(method, name=method_name) - # Run the server + # Patch tools with BFCL's richer descriptions + patch_tool_with_func_doc(server, func_docs) + await server.run_stdio_async()