Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions scripts/build_tools.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
#!/usr/bin/env python3
"""Build ToolUniverse tools."""

import sys
import argparse
from pathlib import Path

sys.path.insert(0, str(Path(__file__).parent.parent / "src"))


def main():
from tooluniverse.generate_tools import main as generate

parser = argparse.ArgumentParser(
description="Build ToolUniverse tools",
formatter_class=argparse.RawDescriptionHelpFormatter,
Expand All @@ -18,15 +20,16 @@ def main():
python scripts/build_tools.py --force # Force regenerate all tools
python scripts/build_tools.py --verbose # Show detailed change information
python scripts/build_tools.py --force -v # Force rebuild with verbose output
"""
""",
)
parser.add_argument(
"--force",
action="store_true",
help="Force regeneration of all tools regardless of changes detected",
)
parser.add_argument(
"--verbose", "-v",
"--verbose",
"-v",
action="store_true",
help="Print detailed change information for each tool",
)
Expand All @@ -35,17 +38,18 @@ def main():
action="store_true",
help="Skip formatting generated files",
)

args = parser.parse_args()

print("🔧 Building ToolUniverse tools...")
generate(
format_enabled=not args.no_format,
force_regenerate=args.force,
verbose=args.verbose
verbose=args.verbose,
)
print("✅ Build complete!")
return 0


if __name__ == "__main__":
sys.exit(main())
2 changes: 0 additions & 2 deletions scripts/check_tool_name_lengths.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,3 @@ def main(argv: List[str] | None = None) -> int:

if __name__ == "__main__":
sys.exit(main())


99 changes: 59 additions & 40 deletions scripts/test_new_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
4. Validates return results against return_schema
5. Reports success/failure
"""

import json
import os
import sys
Expand All @@ -23,6 +24,7 @@
try:
import jsonschema
from jsonschema import validate, ValidationError

JSONSCHEMA_AVAILABLE = True
except ImportError:
JSONSCHEMA_AVAILABLE = False
Expand All @@ -35,31 +37,37 @@ def load_config_from_file(config_path: str) -> list:
return json.load(f)


def validate_against_schema(data: Any, schema: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
def validate_against_schema(
data: Any, schema: Dict[str, Any]
) -> Tuple[bool, Optional[str]]:
"""
Validate data against JSON schema.

Args:
data: Data to validate
schema: JSON schema to validate against

Returns:
Tuple of (is_valid, error_message)
"""
if not JSONSCHEMA_AVAILABLE:
return True, None # Skip validation if jsonschema not available

if not schema:
return True, None # No schema to validate against

try:
validate(instance=data, schema=schema)
return True, None
except ValidationError as e:
error_path = " -> ".join(str(p) for p in e.absolute_path) if e.absolute_path else "root"
error_path = (
" -> ".join(str(p) for p in e.absolute_path) if e.absolute_path else "root"
)
error_msg = f"Schema validation failed at '{error_path}': {e.message}"
if e.context:
error_msg += f"\n Context: {', '.join(str(c.message) for c in e.context[:3])}"
error_msg += (
f"\n Context: {', '.join(str(c.message) for c in e.context[:3])}"
)
return False, error_msg
except Exception as e:
return False, f"Schema validation error: {str(e)}"
Expand All @@ -68,44 +76,46 @@ def validate_against_schema(data: Any, schema: Dict[str, Any]) -> Tuple[bool, Op
def extract_result_data(result: Dict[str, Any]) -> Any:
"""
Extract the actual data from ToolUniverse result format.

ToolUniverse may return results in different formats:
- {"success": True, "data": {...}}
- {"success": True, ...} (direct data)
- The result itself if it's not a dict
"""
if not isinstance(result, dict):
return result

if result.get("success") is False:
return None # Error case, no data to validate

# Try to extract data field
if "data" in result:
return result["data"]

# If no "data" field, return the whole result (minus success/error fields)
return {k: v for k, v in result.items() if k not in ["success", "error", "error_details"]}
return {
k: v
for k, v in result.items()
if k not in ["success", "error", "error_details"]
}


def test_tool_with_examples(
tu: ToolUniverse,
tool_name: str,
examples: list,
return_schema: Optional[Dict[str, Any]] = None
tu: ToolUniverse,
tool_name: str,
examples: list,
return_schema: Optional[Dict[str, Any]] = None,
):
"""Test a tool with its test examples and validate against return_schema."""
results = []
for idx, example in enumerate(examples):
try:
result = tu.run_one_function(
{"name": tool_name, "arguments": example}
)
result = tu.run_one_function({"name": tool_name, "arguments": example})
success = isinstance(result, dict) and result.get("success", False)

schema_valid = True
schema_error = None

if success and return_schema:
# Extract actual data from result
result_data = extract_result_data(result)
Expand All @@ -114,7 +124,7 @@ def test_tool_with_examples(
# wrap it appropriately or validate the inner data.data structure
schema_to_validate = return_schema
data_to_validate = result_data

# Check if schema expects status/url at root but we only have data
schema_root_props = return_schema.get("properties", {})
if "status" in schema_root_props and "data" in schema_root_props:
Expand All @@ -128,12 +138,14 @@ def test_tool_with_examples(
else:
# Wrap in expected structure (make status/url optional in validation)
pass # Try validating as-is first

schema_valid, schema_error = validate_against_schema(data_to_validate, schema_to_validate)

schema_valid, schema_error = validate_against_schema(
data_to_validate, schema_to_validate
)
else:
schema_valid = False
schema_error = "No data returned to validate"

results.append(
{
"example_idx": idx,
Expand Down Expand Up @@ -202,21 +214,27 @@ def main():
print(f"⚠️ {tool_name}: No test_examples found")
continue

schema_info = " (with schema validation)" if return_schema else " (no return_schema)"
print(f"\n🧪 Testing {tool_name} ({len(test_examples)} examples){schema_info}...")
results = test_tool_with_examples(tu, tool_name, test_examples, return_schema)
schema_info = (
" (with schema validation)" if return_schema else " (no return_schema)"
)
print(
f"\n🧪 Testing {tool_name} ({len(test_examples)} examples){schema_info}..."
)
results = test_tool_with_examples(
tu, tool_name, test_examples, return_schema
)

for r in results:
total_tests += 1
execution_pass = r["success"]
schema_pass = r.get("schema_valid", True)

# Track schema validation separately
if return_schema and execution_pass:
total_schema_tests += 1
if schema_pass:
total_schema_passed += 1

if execution_pass and schema_pass:
total_passed += 1
status_icon = "✅"
Expand All @@ -227,18 +245,18 @@ def main():
if not execution_pass:
status_parts.append(f"Execution: {r['error']}")
if not schema_pass and return_schema:
status_parts.append(f"Schema: {r.get('schema_error', 'Invalid')}")
status_parts.append(
f"Schema: {r.get('schema_error', 'Invalid')}"
)
status_msg = " | ".join(status_parts) if status_parts else "FAIL"
print(f" {status_icon} Example {r['example_idx']+1}: {status_msg}")

print(f" {status_icon} Example {r['example_idx'] + 1}: {status_msg}")

# Show schema validation details if failed
if execution_pass and not schema_pass and r.get("schema_error"):
print(f" └─ Schema error: {r['schema_error']}")

group_results.append(
{"tool_name": tool_name, "results": results}
)
group_results.append({"tool_name": tool_name, "results": results})

all_results[tool_group] = group_results

Expand All @@ -251,14 +269,16 @@ def main():
print(f" Failed: {total_tests - total_passed}")
if total_tests > 0:
print(f" Success rate: {100 * total_passed / total_tests:.1f}%")

if total_schema_tests > 0:
print(f"\n📋 Schema Validation:")
print(f" Schema tests: {total_schema_tests}")
print(f" Schema passed: {total_schema_passed}")
print(f" Schema failed: {total_schema_tests - total_schema_passed}")
if total_schema_tests > 0:
print(f" Schema validation rate: {100 * total_schema_passed / total_schema_tests:.1f}%")
print(
f" Schema validation rate: {100 * total_schema_passed / total_schema_tests:.1f}%"
)

# Exit with error if any tests failed
if total_passed < total_tests:
Expand All @@ -270,4 +290,3 @@ def main():

if __name__ == "__main__":
main()

Loading