mims-harvard · jgbradley1 · Nov 25, 2025
diff --git a/scripts/build_tools.py b/scripts/build_tools.py
@@ -1,14 +1,16 @@
 #!/usr/bin/env python3
 """Build ToolUniverse tools."""
+
 import sys
 import argparse
 from pathlib import Path
 
 sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 
+
 def main():
     from tooluniverse.generate_tools import main as generate
-    
+
     parser = argparse.ArgumentParser(
         description="Build ToolUniverse tools",
         formatter_class=argparse.RawDescriptionHelpFormatter,
@@ -18,15 +20,16 @@ def main():
   python scripts/build_tools.py --force      # Force regenerate all tools
   python scripts/build_tools.py --verbose    # Show detailed change information
   python scripts/build_tools.py --force -v   # Force rebuild with verbose output
-        """
+        """,
     )
     parser.add_argument(
         "--force",
         action="store_true",
         help="Force regeneration of all tools regardless of changes detected",
     )
     parser.add_argument(
-        "--verbose", "-v",
+        "--verbose",
+        "-v",
         action="store_true",
         help="Print detailed change information for each tool",
     )
@@ -35,17 +38,18 @@ def main():
         action="store_true",
         help="Skip formatting generated files",
     )
-    
+
     args = parser.parse_args()
-    
+
     print("🔧 Building ToolUniverse tools...")
     generate(
         format_enabled=not args.no_format,
         force_regenerate=args.force,
-        verbose=args.verbose
+        verbose=args.verbose,
     )
     print("✅ Build complete!")
     return 0
 
+
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/scripts/check_tool_name_lengths.py b/scripts/check_tool_name_lengths.py
@@ -79,5 +79,3 @@ def main(argv: List[str] | None = None) -> int:
 
 if __name__ == "__main__":
     sys.exit(main())
-
-
diff --git a/scripts/test_new_tools.py b/scripts/test_new_tools.py
@@ -9,6 +9,7 @@
 4. Validates return results against return_schema
 5. Reports success/failure
 """
+
 import json
 import os
 import sys
@@ -23,6 +24,7 @@
 try:
     import jsonschema
     from jsonschema import validate, ValidationError
+
     JSONSCHEMA_AVAILABLE = True
 except ImportError:
     JSONSCHEMA_AVAILABLE = False
@@ -35,31 +37,37 @@ def load_config_from_file(config_path: str) -> list:
         return json.load(f)
 
 
-def validate_against_schema(data: Any, schema: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
+def validate_against_schema(
+    data: Any, schema: Dict[str, Any]
+) -> Tuple[bool, Optional[str]]:
     """
     Validate data against JSON schema.
-    
+
     Args:
         data: Data to validate
         schema: JSON schema to validate against
-        
+
     Returns:
         Tuple of (is_valid, error_message)
     """
     if not JSONSCHEMA_AVAILABLE:
         return True, None  # Skip validation if jsonschema not available
-        
+
     if not schema:
         return True, None  # No schema to validate against
-        
+
     try:
         validate(instance=data, schema=schema)
         return True, None
     except ValidationError as e:
-        error_path = " -> ".join(str(p) for p in e.absolute_path) if e.absolute_path else "root"
+        error_path = (
+            " -> ".join(str(p) for p in e.absolute_path) if e.absolute_path else "root"
+        )
         error_msg = f"Schema validation failed at '{error_path}': {e.message}"
         if e.context:
-            error_msg += f"\n  Context: {', '.join(str(c.message) for c in e.context[:3])}"
+            error_msg += (
+                f"\n  Context: {', '.join(str(c.message) for c in e.context[:3])}"
+            )
         return False, error_msg
     except Exception as e:
         return False, f"Schema validation error: {str(e)}"
@@ -68,44 +76,46 @@ def validate_against_schema(data: Any, schema: Dict[str, Any]) -> Tuple[bool, Op
 def extract_result_data(result: Dict[str, Any]) -> Any:
     """
     Extract the actual data from ToolUniverse result format.
-    
+
     ToolUniverse may return results in different formats:
     - {"success": True, "data": {...}}
     - {"success": True, ...}  (direct data)
     - The result itself if it's not a dict
     """
     if not isinstance(result, dict):
         return result
-        
+
     if result.get("success") is False:
         return None  # Error case, no data to validate
-        
+
     # Try to extract data field
     if "data" in result:
         return result["data"]
-    
+
     # If no "data" field, return the whole result (minus success/error fields)
-    return {k: v for k, v in result.items() if k not in ["success", "error", "error_details"]}
+    return {
+        k: v
+        for k, v in result.items()
+        if k not in ["success", "error", "error_details"]
+    }
 
 
 def test_tool_with_examples(
-    tu: ToolUniverse, 
-    tool_name: str, 
-    examples: list, 
-    return_schema: Optional[Dict[str, Any]] = None
+    tu: ToolUniverse,
+    tool_name: str,
+    examples: list,
+    return_schema: Optional[Dict[str, Any]] = None,
 ):
     """Test a tool with its test examples and validate against return_schema."""
     results = []
     for idx, example in enumerate(examples):
         try:
-            result = tu.run_one_function(
-                {"name": tool_name, "arguments": example}
-            )
+            result = tu.run_one_function({"name": tool_name, "arguments": example})
             success = isinstance(result, dict) and result.get("success", False)
-            
+
             schema_valid = True
             schema_error = None
-            
+
             if success and return_schema:
                 # Extract actual data from result
                 result_data = extract_result_data(result)
@@ -114,7 +124,7 @@ def test_tool_with_examples(
                     # wrap it appropriately or validate the inner data.data structure
                     schema_to_validate = return_schema
                     data_to_validate = result_data
-                    
+
                     # Check if schema expects status/url at root but we only have data
                     schema_root_props = return_schema.get("properties", {})
                     if "status" in schema_root_props and "data" in schema_root_props:
@@ -128,12 +138,14 @@ def test_tool_with_examples(
                         else:
                             # Wrap in expected structure (make status/url optional in validation)
                             pass  # Try validating as-is first
-
-                    schema_valid, schema_error = validate_against_schema(data_to_validate, schema_to_validate)
+
+                    schema_valid, schema_error = validate_against_schema(
+                        data_to_validate, schema_to_validate
+                    )
                 else:
                     schema_valid = False
                     schema_error = "No data returned to validate"
-            
+
             results.append(
                 {
                     "example_idx": idx,
@@ -202,21 +214,27 @@ def main():
                 print(f"⚠️  {tool_name}: No test_examples found")
                 continue
 
-            schema_info = " (with schema validation)" if return_schema else " (no return_schema)"
-            print(f"\n🧪 Testing {tool_name} ({len(test_examples)} examples){schema_info}...")
-            results = test_tool_with_examples(tu, tool_name, test_examples, return_schema)
+            schema_info = (
+                " (with schema validation)" if return_schema else " (no return_schema)"
+            )
+            print(
+                f"\n🧪 Testing {tool_name} ({len(test_examples)} examples){schema_info}..."
+            )
+            results = test_tool_with_examples(
+                tu, tool_name, test_examples, return_schema
+            )
 
             for r in results:
                 total_tests += 1
                 execution_pass = r["success"]
                 schema_pass = r.get("schema_valid", True)
-                
+
                 # Track schema validation separately
                 if return_schema and execution_pass:
                     total_schema_tests += 1
                     if schema_pass:
                         total_schema_passed += 1
-                
+
                 if execution_pass and schema_pass:
                     total_passed += 1
                     status_icon = "✅"
@@ -227,18 +245,18 @@ def main():
                     if not execution_pass:
                         status_parts.append(f"Execution: {r['error']}")
                     if not schema_pass and return_schema:
-                        status_parts.append(f"Schema: {r.get('schema_error', 'Invalid')}")
+                        status_parts.append(
+                            f"Schema: {r.get('schema_error', 'Invalid')}"
+                        )
                     status_msg = " | ".join(status_parts) if status_parts else "FAIL"
-                
-                print(f"  {status_icon} Example {r['example_idx']+1}: {status_msg}")
-                
+
+                print(f"  {status_icon} Example {r['example_idx'] + 1}: {status_msg}")
+
                 # Show schema validation details if failed
                 if execution_pass and not schema_pass and r.get("schema_error"):
                     print(f"      └─ Schema error: {r['schema_error']}")
 
-            group_results.append(
-                {"tool_name": tool_name, "results": results}
-            )
+            group_results.append({"tool_name": tool_name, "results": results})
 
         all_results[tool_group] = group_results
 
@@ -251,14 +269,16 @@ def main():
     print(f"  Failed: {total_tests - total_passed}")
     if total_tests > 0:
         print(f"  Success rate: {100 * total_passed / total_tests:.1f}%")
-    
+
     if total_schema_tests > 0:
         print(f"\n📋 Schema Validation:")
         print(f"  Schema tests: {total_schema_tests}")
         print(f"  Schema passed: {total_schema_passed}")
         print(f"  Schema failed: {total_schema_tests - total_schema_passed}")
         if total_schema_tests > 0:
-            print(f"  Schema validation rate: {100 * total_schema_passed / total_schema_tests:.1f}%")
+            print(
+                f"  Schema validation rate: {100 * total_schema_passed / total_schema_tests:.1f}%"
+            )
 
     # Exit with error if any tests failed
     if total_passed < total_tests:
@@ -270,4 +290,3 @@ def main():
 
 if __name__ == "__main__":
     main()
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -79,5 +79,3 @@ def main(argv: List[str] \| None = None) -> int:

		if __name__ == "__main__":
		sys.exit(main())