From 096089d9161b148da54c8c7324ceddbfd700f696 Mon Sep 17 00:00:00 2001 From: Richard Huth Date: Tue, 15 Jul 2025 14:04:54 +0200 Subject: [PATCH 1/9] open-api-evals-script --- scripts/generate_openapi.py | 621 ++++++++++++++++++++++++++++++++++++ 1 file changed, 621 insertions(+) create mode 100644 scripts/generate_openapi.py diff --git a/scripts/generate_openapi.py b/scripts/generate_openapi.py new file mode 100644 index 0000000..0949ee1 --- /dev/null +++ b/scripts/generate_openapi.py @@ -0,0 +1,621 @@ +#!/usr/bin/env python3 +""" +Python script to generate OpenAPI JSON from evaluators.generated.ts +""" + +import json +import re +import sys +from pathlib import Path +from typing import Dict, Any, List, Optional + + +def parse_typescript_evaluators(file_path: str) -> Dict[str, Any]: + """ + Parse the TypeScript evaluators file to extract evaluator definitions. + """ + with open(file_path, "r", encoding="utf-8") as f: + content = f.read() + + evaluators = {} + + # Find all evaluator definitions using regex + # Look for patterns like "evaluator/id": { ... } + evaluator_pattern = r'"([^"]+)":\s*{' + + # Find all matches + matches = list(re.finditer(evaluator_pattern, content)) + print(f"Found {len(matches)} evaluator matches") + + for i, match in enumerate(matches): + evaluator_id = match.group(1) + start_pos = match.end() - 1 # Position of the opening brace + + # Find the matching closing brace + brace_count = 0 + end_pos = start_pos + + for j, char in enumerate(content[start_pos:], start_pos): + if char == "{": + brace_count += 1 + elif char == "}": + brace_count -= 1 + if brace_count == 0: + end_pos = j + 1 + break + + evaluator_content = content[start_pos:end_pos] + + # Parse the evaluator content + evaluator = parse_evaluator_content(evaluator_id, evaluator_content) + evaluators[evaluator_id] = evaluator + + print(f"Processing evaluator: {evaluator_id}") + + return evaluators + + +def parse_evaluator_content(evaluator_id: str, content: str) -> Dict[str, Any]: + """Parse individual evaluator content.""" + + evaluator = { + "name": evaluator_id, + "description": "", + "category": "other", + "isGuardrail": False, + "requiredFields": [], + "optionalFields": [], + "settings": {}, + } + + # Extract name + name_match = re.search(r"name:\s*`([^`]+)`", content, re.DOTALL) + if name_match: + evaluator["name"] = name_match.group(1).strip() + + # Extract description + desc_match = re.search(r"description:\s*`([^`]+)`", content, re.DOTALL) + if desc_match: + evaluator["description"] = desc_match.group(1).strip() + + # Extract category + cat_match = re.search(r'category:\s*"([^"]+)"', content) + if cat_match: + evaluator["category"] = cat_match.group(1) + + # Extract isGuardrail + guard_match = re.search(r"isGuardrail:\s*(true|false)", content) + if guard_match: + evaluator["isGuardrail"] = guard_match.group(1) == "true" + + # Extract required fields + req_match = re.search(r"requiredFields:\s*\[([^\]]*)\]", content) + if req_match: + req_content = req_match.group(1) + evaluator["requiredFields"] = [ + field.strip().strip('"') for field in re.findall(r'"([^"]+)"', req_content) + ] + + # Extract optional fields + opt_match = re.search(r"optionalFields:\s*\[([^\]]*)\]", content) + if opt_match: + opt_content = opt_match.group(1) + evaluator["optionalFields"] = [ + field.strip().strip('"') for field in re.findall(r'"([^"]+)"', opt_content) + ] + + # Extract settings + settings_match = re.search( + r"settings:\s*{([^}]+(?:{[^}]*}[^}]*)*)}", content, re.DOTALL + ) + if settings_match: + settings_content = settings_match.group(1) + evaluator["settings"] = parse_settings(settings_content) + + return evaluator + + +def parse_settings(settings_content: str) -> Dict[str, Any]: + """Parse settings object.""" + settings = {} + + # Find all setting definitions + setting_pattern = r"(\w+):\s*{([^}]+(?:{[^}]*}[^}]*)*)}" + + for setting_match in re.finditer(setting_pattern, settings_content, re.DOTALL): + setting_name = setting_match.group(1) + setting_content = setting_match.group(2) + + # Extract description + desc_match = re.search(r'description:\s*"([^"]+)"', setting_content) + description = desc_match.group(1) if desc_match else f"{setting_name} setting" + + # Extract default value + default_value = extract_default_value(setting_content) + + settings[setting_name] = {"description": description, "default": default_value} + + return settings + + +def extract_default_value(content: str) -> Any: + """Extract default value from setting content.""" + + # Look for default: followed by various value types + default_match = re.search(r"default:\s*(.+?)(?:,|$)", content, re.DOTALL) + if not default_match: + return None + + value_str = default_match.group(1).strip() + + # Handle different value types + if value_str.startswith('"') and value_str.endswith('"'): + # String value + return value_str[1:-1] + elif value_str.startswith("`") and value_str.endswith("`"): + # Backtick string + return value_str[1:-1] + elif value_str == "true": + return True + elif value_str == "false": + return False + elif value_str.isdigit(): + return int(value_str) + elif value_str.replace(".", "").isdigit(): + return float(value_str) + elif value_str.startswith("[") and value_str.endswith("]"): + # Array value - simplified parsing + return [] + elif value_str.startswith("{") and value_str.endswith("}"): + # Object value - simplified parsing + return {} + else: + return value_str + + +def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: + """Generate OpenAPI 3.1.0 schema from evaluators.""" + + schema = { + "openapi": "3.1.0", + "info": { + "title": "LangEvals API", + "version": "1.0.0", + "description": "API for LangEvals evaluators", + }, + "servers": [ + { + "url": "https://app.langwatch.ai/api/evaluations", + "description": "Production server", + } + ], + "security": [{"api_key": []}], + "paths": {}, + "components": { + "schemas": { + "EvaluationRequest": { + "type": "object", + "properties": { + "data": { + "type": "object", + "properties": { + "input": { + "type": "string", + "description": "The input text to evaluate", + }, + "output": { + "type": "string", + "description": "The output text to evaluate", + }, + "contexts": { + "type": "array", + "items": {"type": "string"}, + "description": "Context information for evaluation", + }, + "expected_output": { + "type": "string", + "description": "Expected output for comparison", + }, + "expected_contexts": { + "type": "array", + "items": {"type": "string"}, + "description": "Expected context information", + }, + "conversation": { + "type": "array", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "enum": ["user", "assistant", "system"], + }, + "content": {"type": "string"}, + }, + "required": ["role", "content"], + }, + "description": "Conversation history", + }, + }, + }, + "settings": { + "type": "object", + "description": "Evaluator settings", + }, + }, + "required": ["data"], + }, + "EvaluationEntry": { + "type": "object", + "properties": { + "input": { + "type": "string", + "description": "The input text to evaluate", + }, + "output": { + "type": "string", + "description": "The output text to evaluate", + }, + "contexts": { + "type": "array", + "items": {"type": "string"}, + "description": "Context information for evaluation", + }, + "expected_output": { + "type": "string", + "description": "Expected output for comparison", + }, + "expected_contexts": { + "type": "array", + "items": {"type": "string"}, + "description": "Expected context information", + }, + "conversation": { + "type": "array", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "enum": ["user", "assistant", "system"], + }, + "content": {"type": "string"}, + }, + "required": ["role", "content"], + }, + "description": "Conversation history", + }, + }, + }, + "EvaluationResult": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": ["processed", "skipped", "error"], + }, + "score": {"type": "number", "description": "Evaluation score"}, + "passed": { + "type": "boolean", + "description": "Whether the evaluation passed", + }, + "label": {"type": "string", "description": "Evaluation label"}, + "details": { + "type": "string", + "description": "Additional details about the evaluation", + }, + "cost": {"$ref": "#/components/schemas/Money"}, + "raw_response": { + "type": "object", + "description": "Raw response from the evaluator", + }, + "error_type": { + "type": "string", + "description": "Type of error if status is 'error'", + }, + "traceback": { + "type": "array", + "items": {"type": "string"}, + "description": "Error traceback if status is 'error'", + }, + }, + "required": ["status"], + }, + "Money": { + "type": "object", + "properties": { + "currency": {"type": "string"}, + "amount": {"type": "number"}, + }, + "required": ["currency", "amount"], + }, + }, + "securitySchemes": { + "api_key": {"type": "apiKey", "in": "header", "name": "X-Auth-Token"} + }, + }, + } + + # Generate paths for each evaluator + for evaluator_id, evaluator in evaluators.items(): + path_key = f"/{evaluator_id.replace('.', '/')}/evaluate" + + # Create evaluator-specific data schema + data_schema = {"type": "object", "properties": {}} + + # Add all possible fields to properties + all_fields = [ + "input", + "output", + "contexts", + "expected_output", + "expected_contexts", + "conversation", + ] + required_fields = [] + + for field in all_fields: + if field in evaluator.get("requiredFields", []): + required_fields.append(field) + + if field in evaluator.get("requiredFields", []) or field in evaluator.get( + "optionalFields", [] + ): + if field == "input": + data_schema["properties"]["input"] = { + "type": "string", + "description": "The input text to evaluate", + } + elif field == "output": + data_schema["properties"]["output"] = { + "type": "string", + "description": "The output text to evaluate", + } + elif field == "contexts": + data_schema["properties"]["contexts"] = { + "type": "array", + "items": {"type": "string"}, + "description": "Context information for evaluation", + } + elif field == "expected_output": + data_schema["properties"]["expected_output"] = { + "type": "string", + "description": "Expected output for comparison", + } + elif field == "expected_contexts": + data_schema["properties"]["expected_contexts"] = { + "type": "array", + "items": {"type": "string"}, + "description": "Expected context information", + } + elif field == "conversation": + data_schema["properties"]["conversation"] = { + "type": "array", + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "enum": ["user", "assistant", "system"], + }, + "content": {"type": "string"}, + }, + "required": ["role", "content"], + }, + "description": "Conversation history", + } + + # Add required fields if any + if required_fields: + data_schema["required"] = required_fields + + # Create evaluator-specific request schema + request_schema_name = ( + f"{evaluator_id.replace('.', '_').replace('/', '_')}Request" + ) + schema["components"]["schemas"][request_schema_name] = { + "type": "object", + "properties": {"data": data_schema}, + "required": ["data"], + } + + # Create settings schema for this evaluator + settings_schema = {"type": "object", "properties": {}} + + # Only process settings if the evaluator has settings + if evaluator.get("settings"): + for setting_key, setting in evaluator["settings"].items(): + property_def = { + "description": setting.get("description", f"{setting_key} setting") + } + + default_value = setting.get("default") + if isinstance(default_value, str): + property_def["type"] = "string" + property_def["default"] = default_value + elif isinstance(default_value, (int, float)): + property_def["type"] = "number" + property_def["default"] = default_value + elif isinstance(default_value, bool): + property_def["type"] = "boolean" + property_def["default"] = default_value + elif isinstance(default_value, list): + property_def["type"] = "array" + if default_value and isinstance(default_value[0], str): + property_def["items"] = {"type": "string"} + else: + property_def["items"] = {"type": "object"} + property_def["default"] = default_value + elif isinstance(default_value, dict): + property_def["type"] = "object" + property_def["default"] = default_value + + settings_schema["properties"][setting_key] = property_def + + # Add settings schema to components only if there are settings + if settings_schema["properties"]: + settings_schema_name = ( + f"{evaluator_id.replace('.', '_').replace('/', '_')}Settings" + ) + schema["components"]["schemas"][settings_schema_name] = settings_schema + + # Create the path with settings + schema["paths"][path_key] = { + "post": { + "summary": evaluator["name"], + "description": evaluator["description"], + "operationId": f"{evaluator_id.replace('.', '_').replace('/', '_')}_evaluate", + "requestBody": { + "content": { + "application/json": { + "schema": { + "allOf": [ + { + "$ref": f"#/components/schemas/{request_schema_name}" + }, + { + "type": "object", + "properties": { + "settings": { + "$ref": f"#/components/schemas/{settings_schema_name}" + } + }, + }, + ] + } + } + }, + "required": True, + }, + "responses": { + "200": { + "description": "Successful evaluation", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/EvaluationResult" + }, + } + } + }, + }, + "400": { + "description": "Bad request", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {"detail": {"type": "string"}}, + } + } + }, + }, + "500": { + "description": "Internal server error", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {"detail": {"type": "string"}}, + } + } + }, + }, + }, + } + } + else: + # Create the path without settings for evaluators with no settings + schema["paths"][path_key] = { + "post": { + "summary": evaluator["name"], + "description": evaluator["description"], + "operationId": f"{evaluator_id.replace('.', '_').replace('/', '_')}_evaluate", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": f"#/components/schemas/{request_schema_name}" + } + } + }, + "required": True, + }, + "responses": { + "200": { + "description": "Successful evaluation", + "content": { + "application/json": { + "schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/EvaluationResult" + }, + } + } + }, + }, + "400": { + "description": "Bad request", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {"detail": {"type": "string"}}, + } + } + }, + }, + "500": { + "description": "Internal server error", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": {"detail": {"type": "string"}}, + } + } + }, + }, + }, + } + } + + return schema + + +def main(): + """Main function to generate OpenAPI schema.""" + try: + # Find the evaluators.generated.ts file + script_dir = Path(__file__).parent + project_root = script_dir.parent + evaluators_file = project_root / "ts-integration" / "evaluators.generated.ts" + + if not evaluators_file.exists(): + print(f"Error: Could not find evaluators file at {evaluators_file}") + sys.exit(1) + + print(f"Reading evaluators from: {evaluators_file}") + evaluators = parse_typescript_evaluators(str(evaluators_file)) + + print(f"Found {len(evaluators)} evaluators") + schema = generate_openapi_schema(evaluators) + + # Write the schema to a file + output_path = script_dir / "openapi.json" + with open(output_path, "w", encoding="utf-8") as f: + json.dump(schema, f, indent=2, ensure_ascii=False) + + print(f"OpenAPI schema generated successfully at: {output_path}") + print(f"Generated {len(evaluators)} evaluator endpoints") + + except Exception as error: + print(f"Error generating OpenAPI schema: {error}") + sys.exit(1) + + +if __name__ == "__main__": + main() From 5fb265c7fbc41cf20ebbfa775a819be1951e419b Mon Sep 17 00:00:00 2001 From: Richard Huth Date: Wed, 16 Jul 2025 09:01:08 +0200 Subject: [PATCH 2/9] Update script --- scripts/generate_openapi.py | 189 ++++++++++++++++++++++++++++++++---- 1 file changed, 168 insertions(+), 21 deletions(-) diff --git a/scripts/generate_openapi.py b/scripts/generate_openapi.py index 0949ee1..f79176a 100644 --- a/scripts/generate_openapi.py +++ b/scripts/generate_openapi.py @@ -19,12 +19,38 @@ def parse_typescript_evaluators(file_path: str) -> Dict[str, Any]: evaluators = {} + # Find the AVAILABLE_EVALUATORS object using a more robust approach + # First find the start of the object + start_pattern = r"export const AVAILABLE_EVALUATORS:\s*{[^}]*}\s*=\s*{" + start_match = re.search(start_pattern, content) + + if not start_match: + print("Could not find AVAILABLE_EVALUATORS object") + return evaluators + + start_pos = start_match.end() - 1 # Position of the opening brace + + # Find the matching closing brace + brace_count = 0 + end_pos = start_pos + + for j, char in enumerate(content[start_pos:], start_pos): + if char == "{": + brace_count += 1 + elif char == "}": + brace_count -= 1 + if brace_count == 0: + end_pos = j + 1 + break + + evaluators_content = content[start_pos:end_pos] + # Find all evaluator definitions using regex # Look for patterns like "evaluator/id": { ... } evaluator_pattern = r'"([^"]+)":\s*{' # Find all matches - matches = list(re.finditer(evaluator_pattern, content)) + matches = list(re.finditer(evaluator_pattern, evaluators_content)) print(f"Found {len(matches)} evaluator matches") for i, match in enumerate(matches): @@ -35,7 +61,7 @@ def parse_typescript_evaluators(file_path: str) -> Dict[str, Any]: brace_count = 0 end_pos = start_pos - for j, char in enumerate(content[start_pos:], start_pos): + for j, char in enumerate(evaluators_content[start_pos:], start_pos): if char == "{": brace_count += 1 elif char == "}": @@ -44,7 +70,7 @@ def parse_typescript_evaluators(file_path: str) -> Dict[str, Any]: end_pos = j + 1 break - evaluator_content = content[start_pos:end_pos] + evaluator_content = evaluators_content[start_pos:end_pos] # Parse the evaluator content evaluator = parse_evaluator_content(evaluator_id, evaluator_content) @@ -52,6 +78,10 @@ def parse_typescript_evaluators(file_path: str) -> Dict[str, Any]: print(f"Processing evaluator: {evaluator_id}") + # Debug: Print content for competitor_blocklist + if evaluator_id == "langevals/competitor_blocklist": + print(f"DEBUG: Content for {evaluator_id}: {evaluator_content[:500]}") + return evaluators @@ -104,13 +134,31 @@ def parse_evaluator_content(evaluator_id: str, content: str) -> Dict[str, Any]: field.strip().strip('"') for field in re.findall(r'"([^"]+)"', opt_content) ] - # Extract settings - settings_match = re.search( - r"settings:\s*{([^}]+(?:{[^}]*}[^}]*)*)}", content, re.DOTALL - ) - if settings_match: - settings_content = settings_match.group(1) - evaluator["settings"] = parse_settings(settings_content) + # Extract settings using a more robust approach + settings_start = content.find("settings:") + if settings_start != -1: + # Find the opening brace after settings: + brace_start = content.find("{", settings_start) + if brace_start != -1: + # Find the matching closing brace + brace_count = 0 + settings_end = brace_start + + for i, char in enumerate(content[brace_start:], brace_start): + if char == "{": + brace_count += 1 + elif char == "}": + brace_count -= 1 + if brace_count == 0: + settings_end = i + 1 + break + + settings_content = content[brace_start + 1 : settings_end - 1] + evaluator["settings"] = parse_settings(settings_content) + else: + print(f"DEBUG: No opening brace found for settings in {evaluator_id}") + else: + print(f"DEBUG: No settings found for {evaluator_id}") return evaluator @@ -119,12 +167,28 @@ def parse_settings(settings_content: str) -> Dict[str, Any]: """Parse settings object.""" settings = {} - # Find all setting definitions - setting_pattern = r"(\w+):\s*{([^}]+(?:{[^}]*}[^}]*)*)}" + # Find all setting definitions using a more robust approach + # Look for patterns like "setting_name: { ... }" with proper brace matching + setting_pattern = r"(\w+):\s*{" for setting_match in re.finditer(setting_pattern, settings_content, re.DOTALL): setting_name = setting_match.group(1) - setting_content = setting_match.group(2) + start_pos = setting_match.end() - 1 # Position of the opening brace + + # Find the matching closing brace + brace_count = 0 + end_pos = start_pos + + for i, char in enumerate(settings_content[start_pos:], start_pos): + if char == "{": + brace_count += 1 + elif char == "}": + brace_count -= 1 + if brace_count == 0: + end_pos = i + 1 + break + + setting_content = settings_content[start_pos + 1 : end_pos - 1] # Extract description desc_match = re.search(r'description:\s*"([^"]+)"', setting_content) @@ -133,6 +197,15 @@ def parse_settings(settings_content: str) -> Dict[str, Any]: # Extract default value default_value = extract_default_value(setting_content) + # Debug output for competitor_blocklist + if setting_name == "competitors": + print(f"DEBUG: Setting content for competitors: {setting_content}") + print(f"DEBUG: Extracted default value: {default_value}") + print(f"DEBUG: Type of default value: {type(default_value)}") + print( + f"DEBUG: Content contains 'competitors': {'competitors' in setting_content}" + ) + settings[setting_name] = {"description": description, "default": default_value} return settings @@ -141,13 +214,58 @@ def parse_settings(settings_content: str) -> Dict[str, Any]: def extract_default_value(content: str) -> Any: """Extract default value from setting content.""" + # Debug output (only for competitors field) + if "competitors" in content: + print(f"DEBUG: extract_default_value called with content: {content}") + # Look for default: followed by various value types + # Use a more robust pattern that handles arrays and objects default_match = re.search(r"default:\s*(.+?)(?:,|$)", content, re.DOTALL) if not default_match: return None value_str = default_match.group(1).strip() + # Debug output (only for competitors field) + if "competitors" in content: + print(f"DEBUG: Raw regex match: {default_match.group(0)}") + print(f"DEBUG: Captured value: {value_str}") + + # If it's an array or object, try to find the complete value + if value_str.startswith("[") or value_str.startswith("{"): + # Find the complete array/object by looking in the original content + # Find the position of the opening bracket in the original content + default_start = content.find("default:") + if default_start != -1: + # Find the opening bracket after "default:" + bracket_start = content.find("[", default_start) + if bracket_start != -1: + # Find the matching closing bracket + bracket_count = 0 + complete_value = "" + + for i, char in enumerate(content[bracket_start:], bracket_start): + complete_value += char + if char == "[": + bracket_count += 1 + elif char == "]": + bracket_count -= 1 + if bracket_count == 0: + break + + value_str = complete_value + + # Debug output (only for competitors field) + if "competitors" in content: + print(f"DEBUG: Original value_str: {default_match.group(1).strip()}") + print(f"DEBUG: Complete value_str: {value_str}") + else: + # Debug output (only for competitors field) + if "competitors" in content: + print(f"DEBUG: Not an array/object, value_str: {value_str}") + print(f"DEBUG: value_str starts with '[': {value_str.startswith('[')}") + print(f"DEBUG: value_str starts with '{{': {value_str.startswith('{')}") + # Handle different value types if value_str.startswith('"') and value_str.endswith('"'): # String value @@ -164,11 +282,17 @@ def extract_default_value(content: str) -> Any: elif value_str.replace(".", "").isdigit(): return float(value_str) elif value_str.startswith("[") and value_str.endswith("]"): - # Array value - simplified parsing - return [] + # Array value - try to parse as JSON + try: + return json.loads(value_str) + except: + return [] elif value_str.startswith("{") and value_str.endswith("}"): - # Object value - simplified parsing - return {} + # Object value - try to parse as JSON + try: + return json.loads(value_str) + except: + return {} else: return value_str @@ -413,12 +537,24 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: request_schema_name = ( f"{evaluator_id.replace('.', '_').replace('/', '_')}Request" ) - schema["components"]["schemas"][request_schema_name] = { + request_schema = { "type": "object", "properties": {"data": data_schema}, "required": ["data"], } + # Add settings property to request schema if evaluator has settings + if evaluator.get("settings"): + print(f"Adding settings to {evaluator_id}") + request_schema["properties"]["settings"] = { + "type": "object", + "description": "Evaluator settings", + } + else: + print(f"No settings found for {evaluator_id}") + + schema["components"]["schemas"][request_schema_name] = request_schema + # Create settings schema for this evaluator settings_schema = {"type": "object", "properties": {}} @@ -441,10 +577,21 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: property_def["default"] = default_value elif isinstance(default_value, list): property_def["type"] = "array" - if default_value and isinstance(default_value[0], str): - property_def["items"] = {"type": "string"} + # Infer item type for arrays + if default_value: + first_item = default_value[0] + if isinstance(first_item, str): + property_def["items"] = {"type": "string"} + elif isinstance(first_item, bool): + property_def["items"] = {"type": "boolean"} + elif isinstance(first_item, (int, float)): + property_def["items"] = {"type": "number"} + elif isinstance(first_item, dict): + property_def["items"] = {"type": "object"} + else: + property_def["items"] = {"type": "string"} else: - property_def["items"] = {"type": "object"} + property_def["items"] = {"type": "string"} property_def["default"] = default_value elif isinstance(default_value, dict): property_def["type"] = "object" From 015607e1f849261e9fe53e22b42c6c623cf7e8ae Mon Sep 17 00:00:00 2001 From: Richard Huth Date: Wed, 16 Jul 2025 09:46:13 +0200 Subject: [PATCH 3/9] Update generate_openapi.py --- scripts/generate_openapi.py | 70 +++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 10 deletions(-) diff --git a/scripts/generate_openapi.py b/scripts/generate_openapi.py index f79176a..0c3e045 100644 --- a/scripts/generate_openapi.py +++ b/scripts/generate_openapi.py @@ -211,6 +211,21 @@ def parse_settings(settings_content: str) -> Dict[str, Any]: return settings +def typescript_to_json(ts_str: str) -> str: + """Convert a TypeScript-style object/array string to valid JSON.""" + # Remove newlines and excessive spaces for easier regex + s = ts_str.strip() + # Remove trailing commas before closing brackets/braces + s = re.sub(r",\s*([}\]])", r"\1", s) + # Quote property names (unquoted keys) + s = re.sub(r"([,{]\s*)([A-Za-z0-9_]+)\s*:", r'\1"\2":', s) + # Quote string values (only those not already quoted) + s = re.sub(r":\s*([A-Za-z0-9_\-/\.]+)", r': "\1"', s) + # Replace single quotes with double quotes + s = s.replace("'", '"') + return s + + def extract_default_value(content: str) -> Any: """Extract default value from setting content.""" @@ -218,6 +233,12 @@ def extract_default_value(content: str) -> Any: if "competitors" in content: print(f"DEBUG: extract_default_value called with content: {content}") + # Debug output for categories field + if "categories" in content: + print( + f"DEBUG: extract_default_value called with content for categories: {content}" + ) + # Look for default: followed by various value types # Use a more robust pattern that handles arrays and objects default_match = re.search(r"default:\s*(.+?)(?:,|$)", content, re.DOTALL) @@ -237,18 +258,24 @@ def extract_default_value(content: str) -> Any: # Find the position of the opening bracket in the original content default_start = content.find("default:") if default_start != -1: - # Find the opening bracket after "default:" - bracket_start = content.find("[", default_start) + bracket_start = -1 + if value_str.startswith("["): + bracket_start = content.find("[", default_start) + elif value_str.startswith("{"): + bracket_start = content.find("{", default_start) + if bracket_start != -1: # Find the matching closing bracket bracket_count = 0 complete_value = "" + open_char = content[bracket_start] + close_char = "]" if open_char == "[" else "}" for i, char in enumerate(content[bracket_start:], bracket_start): complete_value += char - if char == "[": + if char == open_char: bracket_count += 1 - elif char == "]": + elif char == close_char: bracket_count -= 1 if bracket_count == 0: break @@ -259,6 +286,13 @@ def extract_default_value(content: str) -> Any: if "competitors" in content: print(f"DEBUG: Original value_str: {default_match.group(1).strip()}") print(f"DEBUG: Complete value_str: {value_str}") + + # Debug output for categories field + if "categories" in content: + print( + f"DEBUG: Original value_str for categories: {default_match.group(1).strip()}" + ) + print(f"DEBUG: Complete value_str for categories: {value_str}") else: # Debug output (only for competitors field) if "competitors" in content: @@ -266,6 +300,12 @@ def extract_default_value(content: str) -> Any: print(f"DEBUG: value_str starts with '[': {value_str.startswith('[')}") print(f"DEBUG: value_str starts with '{{': {value_str.startswith('{')}") + # Debug output for categories field + if "categories" in content: + print(f"DEBUG: Not an array/object, value_str for categories: {value_str}") + print(f"DEBUG: value_str starts with '[': {value_str.startswith('[')}") + print(f"DEBUG: value_str starts with '{{': {value_str.startswith('{')}") + # Handle different value types if value_str.startswith('"') and value_str.endswith('"'): # String value @@ -286,13 +326,23 @@ def extract_default_value(content: str) -> Any: try: return json.loads(value_str) except: - return [] + # Try to convert TypeScript array to JSON + try: + json_str = typescript_to_json(value_str) + return json.loads(json_str) + except: + return [] elif value_str.startswith("{") and value_str.endswith("}"): # Object value - try to parse as JSON try: return json.loads(value_str) except: - return {} + # Try to convert TypeScript object to JSON + try: + json_str = typescript_to_json(value_str) + return json.loads(json_str) + except: + return {} else: return value_str @@ -566,15 +616,15 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: } default_value = setting.get("default") - if isinstance(default_value, str): + if isinstance(default_value, bool): + property_def["type"] = "boolean" + property_def["default"] = default_value + elif isinstance(default_value, str): property_def["type"] = "string" property_def["default"] = default_value elif isinstance(default_value, (int, float)): property_def["type"] = "number" property_def["default"] = default_value - elif isinstance(default_value, bool): - property_def["type"] = "boolean" - property_def["default"] = default_value elif isinstance(default_value, list): property_def["type"] = "array" # Infer item type for arrays From fe305149b9e966017451e145a31f71771e41071f Mon Sep 17 00:00:00 2001 From: Richard Huth Date: Wed, 16 Jul 2025 10:13:44 +0200 Subject: [PATCH 4/9] remove debugs --- scripts/generate_openapi.py | 54 +------------------------------------ 1 file changed, 1 insertion(+), 53 deletions(-) diff --git a/scripts/generate_openapi.py b/scripts/generate_openapi.py index 0c3e045..3f9a944 100644 --- a/scripts/generate_openapi.py +++ b/scripts/generate_openapi.py @@ -78,10 +78,6 @@ def parse_typescript_evaluators(file_path: str) -> Dict[str, Any]: print(f"Processing evaluator: {evaluator_id}") - # Debug: Print content for competitor_blocklist - if evaluator_id == "langevals/competitor_blocklist": - print(f"DEBUG: Content for {evaluator_id}: {evaluator_content[:500]}") - return evaluators @@ -197,15 +193,6 @@ def parse_settings(settings_content: str) -> Dict[str, Any]: # Extract default value default_value = extract_default_value(setting_content) - # Debug output for competitor_blocklist - if setting_name == "competitors": - print(f"DEBUG: Setting content for competitors: {setting_content}") - print(f"DEBUG: Extracted default value: {default_value}") - print(f"DEBUG: Type of default value: {type(default_value)}") - print( - f"DEBUG: Content contains 'competitors': {'competitors' in setting_content}" - ) - settings[setting_name] = {"description": description, "default": default_value} return settings @@ -229,16 +216,6 @@ def typescript_to_json(ts_str: str) -> str: def extract_default_value(content: str) -> Any: """Extract default value from setting content.""" - # Debug output (only for competitors field) - if "competitors" in content: - print(f"DEBUG: extract_default_value called with content: {content}") - - # Debug output for categories field - if "categories" in content: - print( - f"DEBUG: extract_default_value called with content for categories: {content}" - ) - # Look for default: followed by various value types # Use a more robust pattern that handles arrays and objects default_match = re.search(r"default:\s*(.+?)(?:,|$)", content, re.DOTALL) @@ -247,12 +224,7 @@ def extract_default_value(content: str) -> Any: value_str = default_match.group(1).strip() - # Debug output (only for competitors field) - if "competitors" in content: - print(f"DEBUG: Raw regex match: {default_match.group(0)}") - print(f"DEBUG: Captured value: {value_str}") - - # If it's an array or object, try to find the complete value + # If it's an array or object, try to find the complete value if value_str.startswith("[") or value_str.startswith("{"): # Find the complete array/object by looking in the original content # Find the position of the opening bracket in the original content @@ -282,30 +254,6 @@ def extract_default_value(content: str) -> Any: value_str = complete_value - # Debug output (only for competitors field) - if "competitors" in content: - print(f"DEBUG: Original value_str: {default_match.group(1).strip()}") - print(f"DEBUG: Complete value_str: {value_str}") - - # Debug output for categories field - if "categories" in content: - print( - f"DEBUG: Original value_str for categories: {default_match.group(1).strip()}" - ) - print(f"DEBUG: Complete value_str for categories: {value_str}") - else: - # Debug output (only for competitors field) - if "competitors" in content: - print(f"DEBUG: Not an array/object, value_str: {value_str}") - print(f"DEBUG: value_str starts with '[': {value_str.startswith('[')}") - print(f"DEBUG: value_str starts with '{{': {value_str.startswith('{')}") - - # Debug output for categories field - if "categories" in content: - print(f"DEBUG: Not an array/object, value_str for categories: {value_str}") - print(f"DEBUG: value_str starts with '[': {value_str.startswith('[')}") - print(f"DEBUG: value_str starts with '{{': {value_str.startswith('{')}") - # Handle different value types if value_str.startswith('"') and value_str.endswith('"'): # String value From 45ad0637a03dcc1758f167007cfa1afce19e2a48 Mon Sep 17 00:00:00 2001 From: Richard Huth Date: Thu, 17 Jul 2025 10:02:46 +0200 Subject: [PATCH 5/9] add code samples --- scripts/generate_openapi.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/scripts/generate_openapi.py b/scripts/generate_openapi.py index 3f9a944..6b41978 100644 --- a/scripts/generate_openapi.py +++ b/scripts/generate_openapi.py @@ -226,7 +226,6 @@ def extract_default_value(content: str) -> Any: # If it's an array or object, try to find the complete value if value_str.startswith("[") or value_str.startswith("{"): - # Find the complete array/object by looking in the original content # Find the position of the opening bracket in the original content default_start = content.find("default:") if default_start != -1: @@ -669,6 +668,18 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: }, }, }, + "x-codeSamples": [ + { + "lang": "python", + "label": "Python", + "source": f'import langwatch\n@langwatch.span()\ndef llm_step():\n ... # your existing code\n result = langwatch.get_current_span().evaluate(\n "{evaluator_id}",\n{chr(10).join(f" {field}=\"\", # required" for field in evaluator.get("requiredFields", []))}\n{chr(10).join(f" {field}=\"\", # optional" for field in evaluator.get("optionalFields", []))}\n settings={{}},\n )\n print(result)', + }, + { + "lang": "typescript", + "label": "TypeScript", + "source": f'import {{ type LangWatchTrace }} from "langwatch";\n\nasync function llmStep({{ message, trace }}: {{ message: string, trace: LangWatchTrace }}): Promise {{\n const span = trace.startLLMSpan({{ name: "llmStep" }});\n \n // ... your existing code\n\n // call the evaluator either on a span or on a trace\n const result = await span.evaluate({{\n evaluator: "{evaluator_id}",\n name: "",\n{chr(10).join(f" {field}: \"\", // required" for field in evaluator.get("requiredFields", []))}\n{chr(10).join(f" {field}: \"\", # optional" for field in evaluator.get("optionalFields", []))} settings: {{}},\n }})\n\n console.log(result);', + }, + ], } } else: @@ -725,6 +736,18 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: }, }, }, + "x-codeSamples": [ + { + "lang": "python", + "label": "Python", + "source": f'import langwatch\n@langwatch.span()\ndef llm_step():\n ... # your existing code\n result = langwatch.get_current_span().evaluate(\n "{evaluator_id}",\n{chr(10).join(f" {field}=\"\", # required" for field in evaluator.get("requiredFields", []))}\n{chr(10).join(f" {field}=\"\", # optional" for field in evaluator.get("optionalFields", []))}\n settings={{}},\n )\n print(result)', + }, + { + "lang": "typescript", + "label": "TypeScript", + "source": f'import {{ type LangWatchTrace }} from "langwatch";\n\nasync function llmStep({{ message, trace }}: {{ message: string, trace: LangWatchTrace }}): Promise {{\n const span = trace.startLLMSpan({{ name: "llmStep" }});\n \n // ... your existing code\n\n // call the evaluator either on a span or on a trace\n const result = await span.evaluate({{\n evaluator: "{evaluator_id}",\n name: "",\n{chr(10).join(f" {field}: \"\", // required" for field in evaluator.get("requiredFields", []))}\n{chr(10).join(f" {field}: \"\", # optional" for field in evaluator.get("optionalFields", []))} settings: {{}},\n }})\n\n console.log(result);', + }, + ], } } From 394552dcae78efe9157ceecdee85a84da0503251 Mon Sep 17 00:00:00 2001 From: Richard Huth Date: Thu, 17 Jul 2025 11:21:45 +0200 Subject: [PATCH 6/9] add offline --- scripts/generate_openapi.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/scripts/generate_openapi.py b/scripts/generate_openapi.py index 6b41978..969528d 100644 --- a/scripts/generate_openapi.py +++ b/scripts/generate_openapi.py @@ -671,13 +671,18 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: "x-codeSamples": [ { "lang": "python", - "label": "Python", + "label": "Python (offline)", + "source": f'import langwatch\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n def evaluate(index, row):\n # Your evaluation logic here\n \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": {field}," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{\n \n }}\n )\n\n evaluation.submit(evaluate, index, row)', + }, + { + "lang": "python", + "label": "Python (online)", "source": f'import langwatch\n@langwatch.span()\ndef llm_step():\n ... # your existing code\n result = langwatch.get_current_span().evaluate(\n "{evaluator_id}",\n{chr(10).join(f" {field}=\"\", # required" for field in evaluator.get("requiredFields", []))}\n{chr(10).join(f" {field}=\"\", # optional" for field in evaluator.get("optionalFields", []))}\n settings={{}},\n )\n print(result)', }, { "lang": "typescript", "label": "TypeScript", - "source": f'import {{ type LangWatchTrace }} from "langwatch";\n\nasync function llmStep({{ message, trace }}: {{ message: string, trace: LangWatchTrace }}): Promise {{\n const span = trace.startLLMSpan({{ name: "llmStep" }});\n \n // ... your existing code\n\n // call the evaluator either on a span or on a trace\n const result = await span.evaluate({{\n evaluator: "{evaluator_id}",\n name: "",\n{chr(10).join(f" {field}: \"\", // required" for field in evaluator.get("requiredFields", []))}\n{chr(10).join(f" {field}: \"\", # optional" for field in evaluator.get("optionalFields", []))} settings: {{}},\n }})\n\n console.log(result);', + "source": f'import {{ type LangWatchTrace }} from "langwatch";\n\nasync function llmStep({{ message, trace }}: {{ message: string, trace: LangWatchTrace }}): Promise {{\n const span = trace.startLLMSpan({{ name: "llmStep" }});\n \n // ... your existing code\n\n // call the evaluator either on a span or on a trace\n const result = await span.evaluate({{\n evaluator: "{evaluator_id}",\n name: "",\n{chr(10).join(f" {field}: \"\", // required" for field in evaluator.get("requiredFields", []))}\n{chr(10).join(f" {field}: \"\", # optional" for field in evaluator.get("optionalFields", []))}\n settings: {{}},\n }})\n\n console.log(result);', }, ], } @@ -739,13 +744,18 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: "x-codeSamples": [ { "lang": "python", - "label": "Python", + "label": "Python (offline)", + "source": f'import langwatch\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n def evaluate(index, row):\n # Your evaluation logic here\n \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": {field}," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{\n \n }}\n )\n\n evaluation.submit(evaluate, index, row)', + }, + { + "lang": "python", + "label": "Python (online)", "source": f'import langwatch\n@langwatch.span()\ndef llm_step():\n ... # your existing code\n result = langwatch.get_current_span().evaluate(\n "{evaluator_id}",\n{chr(10).join(f" {field}=\"\", # required" for field in evaluator.get("requiredFields", []))}\n{chr(10).join(f" {field}=\"\", # optional" for field in evaluator.get("optionalFields", []))}\n settings={{}},\n )\n print(result)', }, { "lang": "typescript", "label": "TypeScript", - "source": f'import {{ type LangWatchTrace }} from "langwatch";\n\nasync function llmStep({{ message, trace }}: {{ message: string, trace: LangWatchTrace }}): Promise {{\n const span = trace.startLLMSpan({{ name: "llmStep" }});\n \n // ... your existing code\n\n // call the evaluator either on a span or on a trace\n const result = await span.evaluate({{\n evaluator: "{evaluator_id}",\n name: "",\n{chr(10).join(f" {field}: \"\", // required" for field in evaluator.get("requiredFields", []))}\n{chr(10).join(f" {field}: \"\", # optional" for field in evaluator.get("optionalFields", []))} settings: {{}},\n }})\n\n console.log(result);', + "source": f'import {{ type LangWatchTrace }} from "langwatch";\n\nasync function llmStep({{ message, trace }}: {{ message: string, trace: LangWatchTrace }}): Promise {{\n const span = trace.startLLMSpan({{ name: "llmStep" }});\n \n // ... your existing code\n\n // call the evaluator either on a span or on a trace\n const result = await span.evaluate({{\n evaluator: "{evaluator_id}",\n name: "",\n{chr(10).join(f" {field}: \"\", // required" for field in evaluator.get("requiredFields", []))}\n{chr(10).join(f" {field}: \"\", # optional" for field in evaluator.get("optionalFields", []))}\n settings: {{}},\n }})\n\n console.log(result);', }, ], } From 9bc8502d9a6e1b77c37f1bd9a0e841200bee70ed Mon Sep 17 00:00:00 2001 From: Richard Huth Date: Thu, 17 Jul 2025 13:18:44 +0200 Subject: [PATCH 7/9] Update generate_openapi.py --- scripts/generate_openapi.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/generate_openapi.py b/scripts/generate_openapi.py index 969528d..a21d2a2 100644 --- a/scripts/generate_openapi.py +++ b/scripts/generate_openapi.py @@ -671,13 +671,13 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: "x-codeSamples": [ { "lang": "python", - "label": "Python (offline)", - "source": f'import langwatch\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n def evaluate(index, row):\n # Your evaluation logic here\n \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": {field}," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{\n \n }}\n )\n\n evaluation.submit(evaluate, index, row)', + "label": "Offline Evaluation", + "source": f'import langwatch\n\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": {field}," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{}}\n )\n', }, { "lang": "python", - "label": "Python (online)", - "source": f'import langwatch\n@langwatch.span()\ndef llm_step():\n ... # your existing code\n result = langwatch.get_current_span().evaluate(\n "{evaluator_id}",\n{chr(10).join(f" {field}=\"\", # required" for field in evaluator.get("requiredFields", []))}\n{chr(10).join(f" {field}=\"\", # optional" for field in evaluator.get("optionalFields", []))}\n settings={{}},\n )\n print(result)', + "label": "Realtime Evaluation", + "source": f'import langwatch\n\n@langwatch.span()\ndef llm_step():\n ... # your existing code\n result = langwatch.get_current_span().evaluate(\n "{evaluator_id}",\n data={{\n{chr(10).join([f" \"{field}\": \"\"," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", [])])}\n }},\n settings={{}},\n )\n print(result)', }, { "lang": "typescript", @@ -744,13 +744,13 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: "x-codeSamples": [ { "lang": "python", - "label": "Python (offline)", - "source": f'import langwatch\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n def evaluate(index, row):\n # Your evaluation logic here\n \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": {field}," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{\n \n }}\n )\n\n evaluation.submit(evaluate, index, row)', + "label": "Offline Evaluation", + "source": f'import langwatch\n\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": {field}," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{}}\n )\n', }, { "lang": "python", - "label": "Python (online)", - "source": f'import langwatch\n@langwatch.span()\ndef llm_step():\n ... # your existing code\n result = langwatch.get_current_span().evaluate(\n "{evaluator_id}",\n{chr(10).join(f" {field}=\"\", # required" for field in evaluator.get("requiredFields", []))}\n{chr(10).join(f" {field}=\"\", # optional" for field in evaluator.get("optionalFields", []))}\n settings={{}},\n )\n print(result)', + "label": "Realtime Evaluation", + "source": f'import langwatch\n\n@langwatch.span()\ndef llm_step():\n ... # your existing code\n result = langwatch.get_current_span().evaluate(\n "{evaluator_id}",\n data={{\n{chr(10).join([f" \"{field}\": \"\"," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", [])])}\n }},\n settings={{}},\n )\n print(result)', }, { "lang": "typescript", From 94605f8bc62a01361d0f41dac638d69b1efe2f82 Mon Sep 17 00:00:00 2001 From: Richard Huth Date: Fri, 18 Jul 2025 11:01:57 +0200 Subject: [PATCH 8/9] update spacing --- scripts/generate_openapi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/generate_openapi.py b/scripts/generate_openapi.py index a21d2a2..a3ef917 100644 --- a/scripts/generate_openapi.py +++ b/scripts/generate_openapi.py @@ -672,7 +672,7 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: { "lang": "python", "label": "Offline Evaluation", - "source": f'import langwatch\n\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": {field}," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{}}\n )\n', + "source": f'import langwatch\n\ndf = langwatch.dataset.get_dataset("dataset-id").to_pandas()\n\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n\n # your execution code here\n \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": {field}," if field != "expected_output" else f" \"{field}\": row[\"{field}\"]," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{}}\n )\n', }, { "lang": "python", @@ -745,7 +745,7 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: { "lang": "python", "label": "Offline Evaluation", - "source": f'import langwatch\n\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": {field}," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{}}\n )\n', + "source": f'import langwatch\n\ndf = langwatch.dataset.get_dataset("dataset-id").to_pandas()\n\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n\n # your execution code here\n \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": {field}," if field != "expected_output" else f" \"{field}\": row[\"{field}\"]," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{}}\n )\n', }, { "lang": "python", From 45da26854b954007ad225c182ee3ffd3b6519135 Mon Sep 17 00:00:00 2001 From: Richard Huth Date: Mon, 21 Jul 2025 11:18:53 +0200 Subject: [PATCH 9/9] generate mdx list and open api --- scripts/generate_mdx_list.py | 202 +++++++++++++++++++++++++++++++++++ scripts/generate_openapi.py | 4 +- 2 files changed, 204 insertions(+), 2 deletions(-) create mode 100644 scripts/generate_mdx_list.py diff --git a/scripts/generate_mdx_list.py b/scripts/generate_mdx_list.py new file mode 100644 index 0000000..8b9867f --- /dev/null +++ b/scripts/generate_mdx_list.py @@ -0,0 +1,202 @@ +#!/usr/bin/env python3 +""" +Python script to generate MDX list from OpenAPI JSON +""" + +import json +import sys +from pathlib import Path +from typing import Dict, Any, List + + +def categorize_evaluators( + evaluators: Dict[str, Any], +) -> Dict[str, Dict[str, Any]]: + """Categorize evaluators based on their paths and descriptions.""" + + categories = { + "Expected Answer Evaluation": { + "description": "For when you have the golden answer and want to measure how correct the LLM gets it", + "evaluators": [], + }, + "LLM-as-Judge": { + "description": "For when you don't have a golden answer, but have a set of rules for another LLM to evaluate quality", + "evaluators": [], + }, + "RAG Quality": { + "description": "For measuring the quality of your RAG, check for hallucinations with faithfulness and precision/recall", + "evaluators": [], + }, + "Quality Aspects Evaluation": { + "description": "For when you want to check the language, structure, style and other general quality metrics", + "evaluators": [], + }, + "Safety": { + "description": "Check for PII, prompt injection attempts and toxic content", + "evaluators": [], + }, + "Other": {"description": "Miscellaneous evaluators", "evaluators": []}, + } + + for path, path_info in evaluators.items(): + if not path.endswith("/evaluate"): + continue + + evaluator_id = path.replace("/evaluate", "") + post_info = path_info.get("post", {}) + summary = post_info.get("summary", evaluator_id) + description = post_info.get("description", "") + + # Convert evaluator name to proper endpoint format + # Use the evaluator name and convert to kebab-case + endpoint_id = summary.lower() + # Replace spaces and special characters with hyphens + endpoint_id = endpoint_id.replace(" ", "-") + endpoint_id = endpoint_id.replace("_", "-") + endpoint_id = endpoint_id.replace("/", "-") + # Remove any non-alphanumeric characters except hyphens + import re + + endpoint_id = re.sub(r"[^a-z0-9\-]", "", endpoint_id) + # Remove multiple consecutive hyphens + endpoint_id = re.sub(r"-+", "-", endpoint_id) + # Remove leading/trailing hyphens + endpoint_id = endpoint_id.strip("-") + + evaluator_info = { + "id": evaluator_id, + "name": summary, + "description": description, + "endpoint": f"/api-reference/{endpoint_id}", + } + + # Categorize based on path and description + if any( + keyword in evaluator_id.lower() + for keyword in [ + "exact_match", + "llm_answer_match", + "factual", + "sql_query", + "rouge", + "bleu", + ] + ): + categories["Expected Answer Evaluation"]["evaluators"].append( + evaluator_info + ) + elif any( + keyword in evaluator_id.lower() + for keyword in ["llm_boolean", "llm_score", "llm_category", "rubrics"] + ): + categories["LLM-as-Judge"]["evaluators"].append(evaluator_info) + elif any( + keyword in evaluator_id.lower() + for keyword in [ + "faithfulness", + "context_precision", + "context_recall", + "context_f1", + "response_relevancy", + "response_context", + ] + ): + categories["RAG Quality"]["evaluators"].append(evaluator_info) + elif any( + keyword in evaluator_id.lower() + for keyword in ["language_detection", "valid_format", "summarization"] + ): + categories["Quality Aspects Evaluation"]["evaluators"].append( + evaluator_info + ) + elif any( + keyword in evaluator_id.lower() + for keyword in [ + "pii", + "jailbreak", + "prompt_injection", + "content_safety", + "moderation", + "llama_guard", + ] + ): + categories["Safety"]["evaluators"].append(evaluator_info) + else: + categories["Other"]["evaluators"].append(evaluator_info) + + return categories + + +def generate_mdx(categories: Dict[str, Any]) -> str: + """Generate MDX content.""" + + mdx_content = [] + + for category_name, category_info in categories.items(): + if not category_info["evaluators"]: + continue + + mdx_content.append(f"## {category_name}") + mdx_content.append(f"{category_info['description']}") + mdx_content.append("") + mdx_content.append("| Evaluator | Description |") + mdx_content.append("| --------- | ----------- |") + + for evaluator in category_info["evaluators"]: + # Clean description to remove newlines but keep full text + desc = evaluator["description"] + # Remove newlines and normalize whitespace + desc = " ".join(desc.split()) + + mdx_content.append( + f"| [{evaluator['name']}]({evaluator['endpoint']}) | {desc} |" + ) + + mdx_content.append("") + + return "\n".join(mdx_content) + + +def main(): + """Main function to generate MDX list.""" + try: + # Find the openapi.json file + script_dir = Path(__file__).parent + openapi_file = script_dir / "openapi.json" + + if not openapi_file.exists(): + print(f"Error: Could not find OpenAPI file at {openapi_file}") + sys.exit(1) + + print(f"Reading OpenAPI from: {openapi_file}") + + with open(openapi_file, "r", encoding="utf-8") as f: + openapi_data = json.load(f) + + paths = openapi_data.get("paths", {}) + categories = categorize_evaluators(paths) + mdx_content = generate_mdx(categories) + + # Write the MDX content to a file + output_path = script_dir / "evaluators-list.mdx" + with open(output_path, "w", encoding="utf-8") as f: + f.write(mdx_content) + + print(f"MDX list generated successfully at: {output_path}") + + # Print summary + total_evaluators = sum(len(cat["evaluators"]) for cat in categories.values()) + active_categories = len( + [cat for cat in categories.values() if cat["evaluators"]] + ) + print( + f"Generated {total_evaluators} evaluators across {active_categories} categories" + ) + + except Exception as error: + print(f"Error generating MDX list: {error}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/generate_openapi.py b/scripts/generate_openapi.py index a3ef917..e134d83 100644 --- a/scripts/generate_openapi.py +++ b/scripts/generate_openapi.py @@ -672,7 +672,7 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: { "lang": "python", "label": "Offline Evaluation", - "source": f'import langwatch\n\ndf = langwatch.dataset.get_dataset("dataset-id").to_pandas()\n\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n\n # your execution code here\n \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": {field}," if field != "expected_output" else f" \"{field}\": row[\"{field}\"]," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{}}\n )\n', + "source": f'import langwatch\n\ndf = langwatch.dataset.get_dataset("dataset-id").to_pandas()\n\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n # your execution code here \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": row[\"{field}\"]," if field in ["input", "contexts", "expected_output"] else f" \"{field}\": {field}," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{}}\n )\n', }, { "lang": "python", @@ -745,7 +745,7 @@ def generate_openapi_schema(evaluators: Dict[str, Any]) -> Dict[str, Any]: { "lang": "python", "label": "Offline Evaluation", - "source": f'import langwatch\n\ndf = langwatch.dataset.get_dataset("dataset-id").to_pandas()\n\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n\n # your execution code here\n \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": {field}," if field != "expected_output" else f" \"{field}\": row[\"{field}\"]," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{}}\n )\n', + "source": f'import langwatch\n\ndf = langwatch.dataset.get_dataset("dataset-id").to_pandas()\n\nevaluation = langwatch.evaluation.init("my-incredible-evaluation")\n\nfor index, row in evaluation.loop(df.iterrows()):\n # your execution code here \n evaluation.run(\n "{evaluator_id}",\n index=index,\n data={{\n{chr(10).join(f" \"{field}\": row[\"{field}\"]," if field in ["input", "contexts", "expected_output"] else f" \"{field}\": {field}," for field in evaluator.get("requiredFields", []) + evaluator.get("optionalFields", []))}\n }},\n settings={{}}\n )\n', }, { "lang": "python",