diff --git a/biomni/agent/a1.py b/biomni/agent/a1.py index b6c10d4c5..293830d3b 100644 --- a/biomni/agent/a1.py +++ b/biomni/agent/a1.py @@ -2,6 +2,8 @@ import inspect import os import re +import sys +import time from collections.abc import Generator from pathlib import Path from typing import Any, Literal, TypedDict @@ -51,6 +53,8 @@ def __init__( base_url: str | None = None, api_key: str | None = None, expected_data_lake_files: list | None = None, + trace_tracking: bool = False, + trace_output_dir: str = "evaluation_results/reasoning_traces", ): """Initialize the biomni agent. @@ -62,6 +66,9 @@ def __init__( timeout_seconds: Timeout for code execution in seconds base_url: Base URL for custom model serving (e.g., "http://localhost:8000/v1") api_key: API key for the custom LLM + expected_data_lake_files: List of expected data lake files + trace_tracking: Whether to enable trace tracking for detailed reasoning reports + trace_output_dir: Directory to save trace reports """ # Use default_config values for unspecified parameters @@ -171,6 +178,30 @@ def __init__( # Add timeout parameter self.timeout_seconds = timeout_seconds # 10 minutes default timeout + + # Initialize trace tracking + self.trace_tracking = trace_tracking + if self.trace_tracking: + # Set matplotlib backend to avoid GUI issues + os.environ["MPLBACKEND"] = "Agg" + + # Import ReasoningTraceReporter here to avoid circular imports + from biomni.evaluation.reasoning_trace_reporter import ReasoningTraceReporter + + self.trace_reporter = ReasoningTraceReporter(trace_output_dir) + else: + self.trace_reporter = None + + # Enhanced logging for trace analysis + self.enhanced_log = [] + self.current_query = None + self.execution_start_time = None + + # Terminal output capture + self.terminal_output_buffer = [] + self.original_stdout = sys.stdout + self.original_stderr = sys.stderr + self.configure() def add_tool(self, api): @@ -1528,24 +1559,48 @@ def go(self, prompt): Args: prompt: The user's query + Returns: + Tuple of (log, final_message_content) """ - self.critic_count = 0 - self.user_task = prompt + # Start trace reporting + if self.trace_tracking: + self.trace_reporter.start_trace(prompt) + self.current_query = prompt + self.execution_start_time = time.time() + self.enhanced_log = [] - if self.use_tool_retriever: - selected_resources_names = self._prepare_resources_for_retrieval(prompt) - self.update_system_prompt_with_selected_resources(selected_resources_names) + # Add initial query to terminal output + self.trace_reporter.add_terminal_output(f"Query: {prompt}", "query") - inputs = {"messages": [HumanMessage(content=prompt)], "next_step": None} - config = {"recursion_limit": 500, "configurable": {"thread_id": 42}} - self.log = [] + # Start terminal capture + self._start_terminal_capture() - for s in self.app.stream(inputs, stream_mode="values", config=config): - message = s["messages"][-1] - out = pretty_print(message) - self.log.append(out) + try: + self.critic_count = 0 + self.user_task = prompt + + if self.use_tool_retriever: + selected_resources_names = self._prepare_resources_for_retrieval(prompt) + self.update_system_prompt_with_selected_resources(selected_resources_names) + + inputs = {"messages": [HumanMessage(content=prompt)], "next_step": None} + config = {"recursion_limit": 500, "configurable": {"thread_id": 42}} + self.log = [] + + for s in self.app.stream(inputs, stream_mode="values", config=config): + message = s["messages"][-1] + out = pretty_print(message) + self.log.append(out) + + # Process trace reporting + if self.trace_tracking: + self._process_trace_reporting(self.log, message.content) - return self.log, message.content + return self.log, message.content + + finally: + # Stop terminal capture + self._stop_terminal_capture() def go_stream(self, prompt) -> Generator[dict, None, None]: """Execute the agent with the given prompt and return a generator that yields each step. @@ -1559,6 +1614,13 @@ def go_stream(self, prompt) -> Generator[dict, None, None]: Yields: dict: Each step of the agent's execution containing the current message and state """ + # Start trace reporting + if self.trace_tracking: + self.trace_reporter.start_trace(prompt) + self.current_query = prompt + self.execution_start_time = time.time() + self.enhanced_log = [] + self.critic_count = 0 self.user_task = prompt @@ -1575,9 +1637,17 @@ def go_stream(self, prompt) -> Generator[dict, None, None]: out = pretty_print(message) self.log.append(out) + # Add to enhanced log for trace analysis + if self.trace_tracking: + self.enhanced_log.append(out) + # Yield the current step yield {"output": out} + # Process trace reporting after completion + if self.trace_tracking: + self._process_trace_reporting(self.enhanced_log, message.content) + def update_system_prompt_with_selected_resources(self, selected_resources): """Update the system prompt with the selected resources.""" # Extract tool descriptions for the selected tools @@ -1726,6 +1796,87 @@ def _inject_custom_functions_to_repl(self): builtins._biomni_custom_functions = {} builtins._biomni_custom_functions.update(self._custom_functions) + # Add custom plot saving function if trace tracking is enabled + if self.trace_tracking and self.trace_reporter: + import builtins + from datetime import datetime + + import matplotlib.pyplot as plt + + from biomni.tool.support_tools import _persistent_namespace + + # Store original savefig if not already stored + if not hasattr(plt, "_original_savefig"): + plt._original_savefig = plt.savefig + + # Override plt.savefig to save to query folder + def custom_savefig(*args, **kwargs): + """ + Override plt.savefig to save to query folder when trace tracking is enabled. + This ensures all plots are saved in the query-specific directory. + """ + if self.trace_tracking and self.trace_reporter: + # Determine filename + if args and isinstance(args[0], str): + filename = args[0] + other_args = args[1:] + else: + # Generate filename based on number of existing plots + plot_count = len(self.trace_reporter.trace_data["generated_plots"]) + 1 + filename = f"plot_{plot_count}.png" + other_args = args + + # Ensure filename has .png extension + if not filename.endswith(".png"): + filename += ".png" + + # Save to query folder + plot_path = self.trace_reporter.query_folder / filename + result = plt._original_savefig(plot_path, *other_args, **kwargs) + + # Add to generated plots list for final report + plot_info = { + "name": filename.replace(".png", ""), + "path": str(plot_path), + "timestamp": datetime.now().isoformat(), + } + self.trace_reporter.trace_data["generated_plots"].append(plot_info) + + print(f"Plot saved to query folder: {plot_path}") + return result + else: + # Fall back to original savefig + return plt._original_savefig(*args, **kwargs) + + # Replace plt.savefig with our custom version + plt.savefig = custom_savefig + + # Inject the modified plt into the namespace + _persistent_namespace["plt"] = plt + + # Also provide a convenience function + def save_plot_to_query_folder(filename=None, **kwargs): + """ + Convenience function to save plot to query folder. + This is equivalent to plt.savefig() when trace tracking is enabled. + + Args: + filename: Optional filename for the plot + **kwargs: Additional arguments to pass to plt.savefig() + """ + if filename: + return plt.savefig(filename, **kwargs) + else: + return plt.savefig(**kwargs) + + # Inject the convenience function + _persistent_namespace["save_plot_to_query_folder"] = save_plot_to_query_folder + + # Also make it available in builtins + if not hasattr(builtins, "_biomni_custom_functions"): + builtins._biomni_custom_functions = {} + builtins._biomni_custom_functions["save_plot_to_query_folder"] = save_plot_to_query_folder + def create_mcp_server(self, tool_modules=None): """ Create an MCP server object that exposes internal Biomni tools. @@ -1877,3 +2028,271 @@ def wrapper(**kwargs) -> dict: wrapper.__signature__ = inspect.Signature(new_params, return_annotation=dict) return wrapper + + def _start_terminal_capture(self): + """Start capturing terminal output.""" + if self.trace_tracking: + self.terminal_output_buffer = [] + + # Create a custom stdout that captures output + class CapturingStdout: + def __init__(self, original_stdout, buffer, reporter): + self.original_stdout = original_stdout + self.buffer = buffer + self.reporter = reporter + + def write(self, text): + self.original_stdout.write(text) + self.buffer.append(text) + if self.reporter and hasattr(self.reporter, "add_terminal_output"): + self.reporter.add_terminal_output(text, "stdout") + + def flush(self): + self.original_stdout.flush() + + sys.stdout = CapturingStdout(self.original_stdout, self.terminal_output_buffer, self.trace_reporter) + + def _stop_terminal_capture(self): + """Stop capturing terminal output.""" + if self.trace_tracking: + sys.stdout = self.original_stdout + sys.stderr = self.original_stderr + + def _process_trace_reporting(self, log: list[Any], final_content: str): + """ + Process the execution log to generate trace report. + + Args: + log: The execution log from the agent + final_content: The final content returned by the agent + """ + if not self.trace_tracking or not self.trace_reporter: + return + + # Parse the log to extract trace information + self.trace_reporter.parse_agent_log(log) + + # Add performance metrics + if self.execution_start_time: + execution_time = time.time() - self.execution_start_time + self.trace_reporter.trace_data["performance_metrics"]["total_execution_time"] = execution_time + + # End the trace + self.trace_reporter.end_trace(final_content) + + def generate_trace_report(self, filename: str | None = None) -> str: + """ + Generate an HTML trace report for the last execution. + + Args: + filename: Optional filename for the report + + Returns: + Path to the generated HTML file + """ + if not self.trace_tracking or not self.trace_reporter: + raise RuntimeError("Trace tracking is not enabled") + + return self.trace_reporter.generate_html_report(filename) + + def generate_final_user_report(self, filename: str | None = None) -> str: + """ + Generate a clean, final user report with plots and evidence. + + Args: + filename: Optional filename for the report + + Returns: + Path to the generated HTML file + """ + if not self.trace_tracking or not self.trace_reporter: + raise RuntimeError("Trace tracking is not enabled") + + return self.trace_reporter.generate_final_user_report(filename) + + def capture_plot(self, plot_name: str = None) -> str: + """ + Capture the current matplotlib plot and save it. + + Args: + plot_name: Optional name for the plot file + + Returns: + Path to the saved plot file + """ + if not self.trace_tracking or not self.trace_reporter: + raise RuntimeError("Trace tracking is not enabled") + + return self.trace_reporter.capture_plot(plot_name) + + def set_final_result(self, result: str): + """ + Set the final result for the query. + + Args: + result: The final result text + """ + if not self.trace_tracking or not self.trace_reporter: + raise RuntimeError("Trace tracking is not enabled") + + self.trace_reporter.set_final_result(result) + + def capture_current_plot(self, plot_name: str = None) -> str: + """ + Capture the current plot and save it to the query folder. + This method can be called from within the agent's execution. + + Args: + plot_name: Optional name for the plot file + + Returns: + Path to the saved plot file + """ + if not self.trace_tracking or not self.trace_reporter: + return None + + try: + import matplotlib.pyplot as plt + + # Check if there's a current figure + if not plt.get_fignums(): + print("Warning: No active plot to capture") + return None + + # Capture the current figure + if not plot_name: + plot_name = f"plot_{len(self.trace_reporter.trace_data['generated_plots']) + 1}" + + return self.capture_plot(plot_name) + + except Exception as e: + print(f"Warning: Could not capture current plot: {e}") + return None + + def save_complete_terminal_output(self, filename: str | None = None) -> str: + """ + Save the complete terminal output to a text file. + + Args: + filename: Optional filename for the output file + + Returns: + Path to the saved text file + """ + if not self.trace_tracking or not self.trace_reporter: + raise RuntimeError("Trace tracking is not enabled") + + return self.trace_reporter.save_complete_terminal_output(filename) + + def get_trace_data(self) -> dict[str, Any]: + """ + Get the current trace data for analysis. + + Returns: + Dictionary containing trace data + """ + if not self.trace_tracking or not self.trace_reporter: + return {} + + return self.trace_reporter.trace_data + + def add_custom_trace_step(self, step_type: str, content: Any, metadata: dict | None = None): + """ + Add a custom step to the trace for additional analysis. + + Args: + step_type: Type of the step + content: Content of the step + metadata: Additional metadata + """ + if self.trace_tracking and self.trace_reporter: + self.trace_reporter.add_step(step_type, content, metadata) + + def analyze_tool_usage_patterns(self) -> dict[str, Any]: + """ + Analyze tool usage patterns from the trace data. + + Returns: + Dictionary containing analysis results + """ + if not self.trace_tracking or not self.trace_reporter: + return {} + + trace_data = self.trace_reporter.trace_data + analysis = { + "total_tool_calls": len(trace_data.get("tool_calls", [])), + "tool_usage_frequency": {}, + "code_execution_frequency": {}, + "reasoning_steps_breakdown": {}, + "performance_metrics": trace_data["performance_metrics"], + } + + # Analyze tool usage frequency + for tool_call in trace_data.get("tool_calls", []): + tool_name = tool_call["tool_name"] + analysis["tool_usage_frequency"][tool_name] = analysis["tool_usage_frequency"].get(tool_name, 0) + 1 + + # Analyze code execution patterns + for code_exec in trace_data["code_executions"]: + code_type = "generated" if code_exec["is_generated"] else "pre_written" + analysis["code_execution_frequency"][code_type] = analysis["code_execution_frequency"].get(code_type, 0) + 1 + + # Analyze reasoning steps + for step in trace_data["steps"]: + step_type = step["type"] + analysis["reasoning_steps_breakdown"][step_type] = ( + analysis["reasoning_steps_breakdown"].get(step_type, 0) + 1 + ) + + # Count code generation steps separately + if step_type == "code_generation": + analysis["code_execution_frequency"]["generated"] = ( + analysis["code_execution_frequency"].get("generated", 0) + 1 + ) + + return analysis + + def export_trace_data(self, format: str = "json", filename: str | None = None) -> str: + """ + Export trace data in various formats for further analysis. + + Args: + format: Export format ('json', 'csv', 'pickle') + filename: Optional filename for export + + Returns: + Path to the exported file + """ + if not self.trace_tracking or not self.trace_reporter: + raise RuntimeError("Trace tracking is not enabled") + + import json + import pickle + from datetime import datetime + + import pandas as pd + + if not filename: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"trace_data_{timestamp}.{format}" + + # Use query_folder if available, otherwise fall back to output_dir + if hasattr(self.trace_reporter, "query_folder") and self.trace_reporter.query_folder: + filepath = self.trace_reporter.query_folder / filename + else: + filepath = self.trace_reporter.output_dir / filename + + if format == "json": + with open(filepath, "w", encoding="utf-8") as f: + json.dump(self.trace_reporter.trace_data, f, indent=2, default=str) + elif format == "csv": + # Convert steps to DataFrame + steps_df = pd.DataFrame(self.trace_reporter.trace_data["steps"]) + steps_df.to_csv(filepath, index=False) + elif format == "pickle": + with open(filepath, "wb") as f: + pickle.dump(self.trace_reporter.trace_data, f) + else: + raise ValueError(f"Unsupported format: {format}") + + return str(filepath) diff --git a/biomni/evaluation/__init__.py b/biomni/evaluation/__init__.py new file mode 100644 index 000000000..72d8728b0 --- /dev/null +++ b/biomni/evaluation/__init__.py @@ -0,0 +1,10 @@ +""" +Evaluation module for Biomni + +This module provides tools and utilities for evaluating biomni's performance, +reasoning trace analysis, and detailed reporting capabilities. +""" + +from .reasoning_trace_reporter import ReasoningTraceReporter + +__all__ = ["ReasoningTraceReporter"] diff --git a/biomni/evaluation/reasoning_trace_reporter.py b/biomni/evaluation/reasoning_trace_reporter.py new file mode 100644 index 000000000..79a88fa78 --- /dev/null +++ b/biomni/evaluation/reasoning_trace_reporter.py @@ -0,0 +1,1306 @@ +""" +Reasoning Trace Reporter for Biomni + +This module provides functionality to generate detailed HTML reports of biomni's reasoning trace, +including all tool calls, code execution, and reasoning steps for evaluation purposes. +""" + +import os +import re +from datetime import datetime +from pathlib import Path +from typing import Any + +from jinja2 import Template + + +class ReasoningTraceReporter: + """ + A class to generate detailed HTML reports of biomni's reasoning trace. + + This reporter captures: + - User queries and system responses + - Tool calls and their parameters + - Code execution (both generated and called) + - Reasoning steps and thought processes + - Execution timing and performance metrics + """ + + def __init__(self, output_dir: str = "evaluation_results/reasoning_traces"): + """ + Initialize the reasoning trace reporter. + + Args: + output_dir: Directory to save HTML reports + """ + self.output_dir = Path(output_dir) + self.output_dir.mkdir(parents=True, exist_ok=True) + + # Trace data structure + self.trace_data = { + "query": "", + "start_time": None, + "end_time": None, + "steps": [], + "code_executions": [], + "performance_metrics": {}, + "complete_terminal_output": [], # Store complete terminal output + "generated_plots": [], # Store generated plots + "final_result": "", # Store final result + } + + # HTML template for the report + self.html_template = self._get_html_template() + + def start_trace(self, query: str): + """Start tracing a new query execution.""" + # Create query-specific subfolder + query_slug = re.sub(r"[^a-zA-Z0-9]", "_", query[:50]) + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + self.query_folder = self.output_dir / f"query_{query_slug}_{timestamp}" + self.query_folder.mkdir(parents=True, exist_ok=True) + + self.trace_data = { + "query": query, + "start_time": datetime.now(), + "end_time": None, + "steps": [], + "code_executions": [], + "performance_metrics": {}, + "complete_terminal_output": [], # Initialize terminal output storage + "generated_plots": [], # Store generated plots + "final_result": "", # Store final result + } + + def add_step(self, step_type: str, content: Any, metadata: dict | None = None): + """ + Add a step to the reasoning trace. + + Args: + step_type: Type of step (e.g., 'thinking', 'tool_call', 'code_execution', 'observation') + content: Content of the step + metadata: Additional metadata about the step + """ + step = { + "type": step_type, + "content": content, + "metadata": metadata or {}, + "timestamp": datetime.now().isoformat(), + } + self.trace_data["steps"].append(step) + + def add_code_execution(self, code: str, result: Any, execution_time: float = None, is_generated: bool = True): + """ + Add a code execution to the trace. + + Args: + code: Code that was executed + result: Result of the execution + execution_time: Time taken for execution + is_generated: Whether the code was generated on-the-fly + """ + code_execution = { + "code": code, + "result": result, + "execution_time": execution_time, + "is_generated": is_generated, + "timestamp": datetime.now().isoformat(), + } + self.trace_data["code_executions"].append(code_execution) + self.trace_data["steps"].append( + { + "type": "code_execution", + "content": code_execution, + "metadata": {}, + "timestamp": datetime.now().isoformat(), + } + ) + + def end_trace(self, final_result: Any = None): + """End the current trace and calculate performance metrics.""" + self.trace_data["end_time"] = datetime.now() + + if self.trace_data["start_time"] and self.trace_data["end_time"]: + total_time = (self.trace_data["end_time"] - self.trace_data["start_time"]).total_seconds() + self.trace_data["performance_metrics"]["total_execution_time"] = total_time + self.trace_data["performance_metrics"]["total_steps"] = len(self.trace_data["steps"]) + self.trace_data["performance_metrics"]["total_code_executions"] = len(self.trace_data["code_executions"]) + + if final_result: + self.trace_data["final_result"] = final_result + + def parse_agent_log(self, log: list[Any]): + """ + Parse an agent log to extract reasoning trace information. + + Args: + log: List of log entries from the agent + """ + for i, log_entry in enumerate(log): + if isinstance(log_entry, str): + # Parse different types of log entries + self._parse_log_entry(log_entry, i) + + def _parse_log_entry(self, log_entry: str, step_index: int): + """Parse a single log entry to extract trace information.""" + + # Clean up the log entry - handle escaped characters and formatting + cleaned_entry = self._clean_log_entry(log_entry) + + # First, check for structured content (think, execute, solution blocks) + if "" in cleaned_entry and "" in cleaned_entry: + self._extract_structured_thinking(cleaned_entry, step_index) + elif "" in cleaned_entry and "" in cleaned_entry: + self._extract_code_execution(cleaned_entry, step_index) + elif "" in cleaned_entry and "" in cleaned_entry: + self._extract_solution(cleaned_entry, step_index) + + # Look for run_python_repl tool calls + elif "run_python_repl" in cleaned_entry: + self._extract_code_execution(cleaned_entry, step_index) + + # Look for observations (tool results) + elif "" in cleaned_entry: + self._extract_observation(cleaned_entry, step_index) + + # Look for errors (including matplotlib GUI errors) + elif any( + error_marker in cleaned_entry + for error_marker in [ + "NSInternalInconsistencyException", + "NSWindow", + "libc++abi: terminating", + "matplotlib", + "GUI", + "thread", + "abort", + ] + ): + self._extract_error(cleaned_entry, step_index) + + # Look for planning and checklist patterns + elif any( + planning_marker in cleaned_entry.lower() + for planning_marker in ["plan checklist", "updated plan", "step", "checklist", "plan:", "steps:"] + ): + self._extract_planning(cleaned_entry, step_index) + + # Look for thinking/reasoning patterns in unstructured text + elif any( + thinking_marker in cleaned_entry.lower() + for thinking_marker in [ + "i need to", + "let me", + "first, i'll", + "i should", + "i think", + "i believe", + "based on", + "considering", + "looking at", + "analyzing", + "examining", + ] + ): + self._extract_thinking(cleaned_entry, step_index) + + # Look for code blocks that might be generated + elif "```python" in cleaned_entry or "```" in cleaned_entry: + self._extract_code_generation(cleaned_entry, step_index) + + # Look for function definitions and code structures + elif any( + code_marker in cleaned_entry + for code_marker in ["def ", "import ", "class ", "for ", "if ", "while ", "try:", "except:"] + ): + self._extract_code_generation(cleaned_entry, step_index) + + # Add as general step if not categorized + else: + self.add_step("general", cleaned_entry, {"step_index": step_index}) + + def _clean_log_entry(self, log_entry: str) -> str: + """Clean up log entry by handling escaped characters and formatting.""" + import html + + # Decode HTML entities + cleaned = html.unescape(log_entry) + + # Handle common escaped characters + cleaned = cleaned.replace("\\n", "\n") + cleaned = cleaned.replace("\\t", "\t") + cleaned = cleaned.replace("\\r", "\r") + cleaned = cleaned.replace("\\'", "'") + cleaned = cleaned.replace('\\"', '"') + + # Handle unicode escapes + cleaned = cleaned.replace("\\u0027", "'") + cleaned = cleaned.replace("\\u0026", "&") + cleaned = cleaned.replace("\\u003c", "<") + cleaned = cleaned.replace("\\u003e", ">") + + return cleaned + + def _extract_planning(self, log_entry: str, step_index: int): + """Extract planning and checklist information from log entry.""" + # Look for planning patterns + planning_patterns = [ + r"### (?:Updated )?Plan.*?(?=\n\n|\n###|\Z)", + r"### Plan Checklist.*?(?=\n\n|\n###|\Z)", + r"### Steps.*?(?=\n\n|\n###|\Z)", + r"1\. \[.*?\].*?(?=\n\n|\n###|\Z)", + ] + + for pattern in planning_patterns: + match = re.search(pattern, log_entry, re.DOTALL | re.IGNORECASE) + if match: + planning_content = match.group(0).strip() + self.add_step( + "planning", + {"content": planning_content, "type": "planning", "full_context": log_entry}, + {"step_index": step_index}, + ) + return + + # If no specific pattern found, add as planning if it contains planning keywords + if any(keyword in log_entry.lower() for keyword in ["plan", "step", "checklist", "next"]): + self.add_step( + "planning", + {"content": log_entry, "type": "planning", "full_context": log_entry}, + {"step_index": step_index}, + ) + + def _extract_code_execution(self, log_entry: str, step_index: int): + """Extract code execution information from log entry.""" + execute_match = re.search(r"(.*?)", log_entry, re.DOTALL) + if execute_match: + code = execute_match.group(1).strip() + + # Check for matplotlib errors and provide solutions + execution_result = "Executed via block" + if "plt.show()" in code and ("NSWindow" in log_entry or "NSInternalInconsistencyException" in log_entry): + execution_result = "Executed via block (matplotlib GUI error - use non-GUI backend)" + elif "NSWindow" in log_entry or "NSInternalInconsistencyException" in log_entry: + execution_result = "Executed via block (GUI threading error)" + + self.add_step( + "code_execution", + {"code": code, "is_generated": True, "execution_result": execution_result}, + {"step_index": step_index}, + ) + + # Also add to code_executions list for summary + self.trace_data["code_executions"].append( + { + "code": code, + "result": execution_result, + "execution_time": None, + "is_generated": True, + "timestamp": self.trace_data["steps"][-1]["timestamp"], + } + ) + elif "run_python_repl" in log_entry: + # Extract code from run_python_repl tool calls + code_match = re.search(r'command["\']?\s*:\s*["\']([^"\']+)["\']', log_entry) + if code_match: + code = code_match.group(1).strip() + + # Check for matplotlib errors + execution_result = "Executed via run_python_repl" + if "plt.show()" in code and ( + "NSWindow" in log_entry or "NSInternalInconsistencyException" in log_entry + ): + execution_result = "Executed via run_python_repl (matplotlib GUI error - use non-GUI backend)" + + self.add_step( + "code_execution", + { + "code": code, + "is_generated": True, + "tool": "run_python_repl", + "execution_result": execution_result, + }, + {"step_index": step_index}, + ) + + # Also add to code_executions list for summary + self.trace_data["code_executions"].append( + { + "code": code, + "result": execution_result, + "execution_time": None, + "is_generated": True, + "timestamp": self.trace_data["steps"][-1]["timestamp"], + } + ) + + def _extract_observation(self, log_entry: str, step_index: int): + """Extract observation (tool result) information from log entry.""" + obs_match = re.search(r"(.*?)", log_entry, re.DOTALL) + if obs_match: + result = obs_match.group(1).strip() + self.add_step( + "observation", + {"content": result, "type": "tool_result", "full_context": log_entry}, + {"step_index": step_index}, + ) + else: + # Look for other observation patterns + if "result:" in log_entry.lower() or "output:" in log_entry.lower(): + self.add_step( + "observation", + {"content": log_entry, "type": "general_result", "full_context": log_entry}, + {"step_index": step_index}, + ) + else: + self.add_step( + "observation", + {"content": log_entry, "type": "general", "full_context": log_entry}, + {"step_index": step_index}, + ) + + def _extract_structured_thinking(self, log_entry: str, step_index: int): + """Extract structured thinking from tags.""" + think_match = re.search(r"(.*?)", log_entry, re.DOTALL) + if think_match: + thinking = think_match.group(1).strip() + self.add_step( + "thinking", + {"content": thinking, "type": "structured", "full_context": log_entry}, + {"step_index": step_index}, + ) + else: + self.add_step( + "thinking", + {"content": log_entry, "type": "unstructured", "full_context": log_entry}, + {"step_index": step_index}, + ) + + def _extract_thinking(self, log_entry: str, step_index: int): + """Extract thinking/reasoning information from unstructured log entry.""" + self.add_step( + "thinking", + {"content": log_entry, "type": "unstructured", "full_context": log_entry}, + {"step_index": step_index}, + ) + + def _extract_solution(self, log_entry: str, step_index: int): + """Extract solution information from tags.""" + solution_match = re.search(r"(.*?)", log_entry, re.DOTALL) + if solution_match: + solution = solution_match.group(1).strip() + self.add_step("solution", {"content": solution, "full_context": log_entry}, {"step_index": step_index}) + else: + self.add_step("solution", {"content": log_entry, "full_context": log_entry}, {"step_index": step_index}) + + def _extract_error(self, log_entry: str, step_index: int): + """Extract error information from log entry.""" + error_type = "Unknown Error" + error_message = log_entry + + if "NSInternalInconsistencyException" in log_entry and "NSWindow" in log_entry: + error_type = "Matplotlib GUI Error" + error_message = "GUI window creation failed - use non-GUI backend (e.g., 'Agg')" + elif "matplotlib" in log_entry.lower(): + error_type = "Matplotlib Error" + elif "thread" in log_entry.lower(): + error_type = "Threading Error" + elif "abort" in log_entry.lower(): + error_type = "Process Abort" + + self.add_step( + "error", + {"error_type": error_type, "error_message": error_message, "full_log": log_entry}, + {"step_index": step_index}, + ) + + def _extract_code_generation(self, log_entry: str, step_index: int): + """Extract code generation information from log entry.""" + # Look for Python code blocks + code_match = re.search(r"```python\s*(.*?)\s*```", log_entry, re.DOTALL) + if code_match: + code = code_match.group(1).strip() + self.add_step( + "code_generation", + {"code": code, "is_generated": True, "context": log_entry}, + {"step_index": step_index}, + ) + else: + # Look for any code block + code_match = re.search(r"```\s*(.*?)\s*```", log_entry, re.DOTALL) + if code_match: + code = code_match.group(1).strip() + self.add_step( + "code_generation", + {"code": code, "is_generated": True, "context": log_entry}, + {"step_index": step_index}, + ) + else: + self.add_step( + "code_generation", + {"code": log_entry, "is_generated": True, "context": "Code generation step"}, + {"step_index": step_index}, + ) + + def generate_html_report(self, filename: str | None = None) -> str: + """ + Generate an HTML report from the current trace data. + + Args: + filename: Optional filename for the report + + Returns: + Path to the generated HTML file + """ + if not filename: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + query_slug = re.sub(r"[^a-zA-Z0-9]", "_", self.trace_data["query"][:50]) + filename = f"reasoning_trace_{query_slug}_{timestamp}.html" + + filepath = self.query_folder / filename + + # Prepare data for template + template_data = { + "query": self.trace_data["query"], + "start_time": self.trace_data["start_time"].isoformat() if self.trace_data["start_time"] else "", + "end_time": self.trace_data["end_time"].isoformat() if self.trace_data["end_time"] else "", + "total_steps": len(self.trace_data["steps"]), + "total_code_executions": len(self.trace_data["code_executions"]), + "performance_metrics": self.trace_data["performance_metrics"], + "steps": self.trace_data["steps"], + "code_executions": self.trace_data["code_executions"], + "final_result": self.trace_data.get("final_result", ""), + "generated_plots": self.trace_data.get("generated_plots", []), + } + + # Render template + template = Template(self.html_template) + html_content = template.render(**template_data) + + # Write to file + with open(filepath, "w", encoding="utf-8") as f: + f.write(html_content) + + return str(filepath) + + def add_terminal_output(self, output: str, output_type: str = "general"): + """ + Add terminal output to the trace. + + Args: + output: The terminal output text + output_type: Type of output (e.g., 'planning', 'execution', 'result', 'error') + """ + terminal_entry = {"content": output, "type": output_type, "timestamp": datetime.now().isoformat()} + self.trace_data["complete_terminal_output"].append(terminal_entry) + + def save_complete_terminal_output(self, filename: str | None = None) -> str: + """ + Save the complete terminal output to a text file. + + Args: + filename: Optional filename for the output file + + Returns: + Path to the saved text file + """ + if not filename: + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + query_slug = re.sub(r"[^a-zA-Z0-9]", "_", self.trace_data["query"][:50]) + filename = f"complete_output_{query_slug}_{timestamp}.txt" + + filepath = self.query_folder / filename + + with open(filepath, "w", encoding="utf-8") as f: + f.write("=" * 80 + "\n") + f.write("COMPLETE TERMINAL OUTPUT - BIOMNI REASONING TRACE\n") + f.write("=" * 80 + "\n\n") + + f.write(f"Query: {self.trace_data['query']}\n") + f.write(f"Start Time: {self.trace_data['start_time']}\n") + f.write(f"End Time: {self.trace_data['end_time']}\n") + f.write( + f"Total Execution Time: {self.trace_data['performance_metrics'].get('total_execution_time', 'N/A')} seconds\n" + ) + f.write("\n" + "=" * 80 + "\n\n") + + for i, output_entry in enumerate(self.trace_data["complete_terminal_output"], 1): + f.write(f"[{i}] {output_entry['timestamp']} - {output_entry['type'].upper()}\n") + f.write("-" * 40 + "\n") + f.write(output_entry["content"]) + f.write("\n\n") + + return str(filepath) + + def capture_plot(self, plot_name: str = None): + """ + Capture the current matplotlib plot and save it. + + Args: + plot_name: Optional name for the plot file + """ + try: + import matplotlib.pyplot as plt + + if not plot_name: + plot_name = f"plot_{len(self.trace_data['generated_plots']) + 1}" + + # Save plot to query folder + plot_path = self.query_folder / f"{plot_name}.png" + plt.savefig(plot_path, dpi=300, bbox_inches="tight") + + # Store plot information + plot_info = {"name": plot_name, "path": str(plot_path), "timestamp": datetime.now().isoformat()} + self.trace_data["generated_plots"].append(plot_info) + + return str(plot_path) + + except Exception as e: + print(f"Warning: Could not capture plot: {e}") + return None + + def set_final_result(self, result: str): + """Set the final result for the query.""" + self.trace_data["final_result"] = result + + def generate_final_user_report(self, filename: str | None = None) -> str: + """ + Generate a clean, final user report with plots and evidence. + + Args: + filename: Optional filename for the report + + Returns: + Path to the generated HTML file + """ + if not filename: + filename = "final_user_report.html" + + filepath = self.query_folder / filename + + # Generate the HTML content + html_content = self._get_final_report_template() + + # Write to file + with open(filepath, "w", encoding="utf-8") as f: + f.write(html_content) + + return str(filepath) + + def _get_final_report_template(self) -> str: + """Get the HTML template for the final user report.""" + return f""" + + + + + + Final Report - {self.trace_data["query"][:50]}... + + + + + + +
+
+

🧬 Final Analysis Report

+

Comprehensive analysis and findings from biomni reasoning system

+
+ +
+
+

šŸ“‹ Query

+
+ {self.trace_data["query"]} +
+
+ +
+

šŸ“Š Analysis Summary

+
+
+
{len(self.trace_data["steps"])}
+
Total Steps
+
+ +
+
{len(self.trace_data["code_executions"])}
+
Code Executions
+
+
+
{len(self.trace_data["generated_plots"])}
+
Generated Plots
+
+
+
+ +
+

šŸŽÆ Final Results

+
+ {self._render_markdown_content(self.trace_data["final_result"]) if self.trace_data["final_result"] else "Results will be displayed here after analysis completion."} +
+
+ +
+

šŸ“ˆ Generated Visualizations

+ {self._generate_plots_html()} +
+
+ + +
+ + + """ + + def _generate_plots_html(self) -> str: + """Generate HTML for plots section.""" + if not self.trace_data["generated_plots"]: + return "

No plots were generated during this analysis.

" + + plots_html = "" + for plot in self.trace_data["generated_plots"]: + # Use relative path for the plot image + plot_filename = os.path.basename(plot["path"]) + plots_html += f""" +
+
{plot["name"].replace("_", " ").title()}
+ {plot[ +

Generated at: {plot["timestamp"]}

+
+ """ + + return plots_html + + def _render_markdown_content(self, content: str) -> str: + """ + Convert markdown content to HTML with proper formatting. + + Args: + content: Markdown content to render + + Returns: + HTML content with proper formatting + """ + if not content: + return "" + + # Basic markdown to HTML conversion + html = content + + # Headers + html = re.sub(r"^### (.*?)$", r"

\1

", html, flags=re.MULTILINE) + html = re.sub(r"^## (.*?)$", r"

\1

", html, flags=re.MULTILINE) + html = re.sub(r"^# (.*?)$", r"

\1

", html, flags=re.MULTILINE) + + # Bold and italic + html = re.sub(r"\*\*(.*?)\*\*", r"\1", html) + html = re.sub(r"\*(.*?)\*", r"\1", html) + + # Code blocks + html = re.sub(r"```(.*?)```", r"
\1
", html, flags=re.DOTALL) + html = re.sub(r"`(.*?)`", r"\1", html) + + # Lists + html = re.sub(r"^\d+\. (.*?)$", r"
  • \1
  • ", html, flags=re.MULTILINE) + html = re.sub(r"^- (.*?)$", r"
  • \1
  • ", html, flags=re.MULTILINE) + + # Wrap consecutive list items in
      or
        + html = re.sub(r"(
      • .*?
      • \n?)+", lambda m: f"
          {m.group(0)}
        ", html, flags=re.DOTALL) + + # Line breaks + html = re.sub(r"\n\n", r"

        ", html) + html = re.sub(r"\n", r"
        ", html) + + # Wrap in paragraphs if not already wrapped + if not html.startswith("<"): + html = f"

        {html}

        " + + # Clean up empty paragraphs + html = re.sub(r"

        \s*

        ", "", html) + html = re.sub(r"


        ", "", html) + + return html + + def _get_html_template(self) -> str: + """Get the HTML template for the report.""" + return """ + + + + + + Biomni Reasoning Trace Report + + + +
        +
        +

        🧬 Biomni Reasoning Trace Report

        +
        Detailed analysis of AI reasoning and tool execution
        +
        + +
        + +
        +
        +

        šŸ” User Query

        +

        {{ query }}

        +
        +
        + + +
        +

        šŸ“Š Performance Metrics

        +
        +
        +
        {{ total_steps }}
        +
        Total Steps
        +
        +
        +
        {{ total_code_executions }}
        +
        Code Executions
        +
        +
        +
        {{ "%.2f"|format(performance_metrics.total_execution_time) if performance_metrics.total_execution_time else "N/A" }}
        +
        Execution Time (s)
        +
        +
        + +
        +
        +
        {{ start_time[:19] if start_time else "N/A" }}
        +
        Start Time
        +
        +
        +
        {{ end_time[:19] if end_time else "N/A" }}
        +
        End Time
        +
        +
        +
        + + +
        +

        šŸ”„ Reasoning Steps

        + {% for step in steps %} +
        +
        +
        + {{ step.type.upper() }} + Step {{ loop.index }} +
        +
        {{ step.timestamp[:19] }}
        +
        +
        + {% if step.type == 'code_execution' %} +
        +

        šŸ’» Code Execution {% if step.content.is_generated %}(Generated){% endif %}

        +
        {{ step.content.code }}
        + {% if step.content.tool %} +

        Tool: {{ step.content.tool }}

        + {% endif %} +
        + {% elif step.type == 'code_generation' %} +
        +

        šŸ“ Code Generation

        +
        {{ step.content.code }}
        + {% if step.content.context %} +

        Context: {{ step.content.context[:200] }}{% if step.content.context|length > 200 %}...{% endif %}

        + {% endif %} +
        + {% elif step.type == 'thinking' %} +
        +

        🧠 Reasoning {% if step.content.type == 'structured' %}(Structured){% else %}(Unstructured){% endif %}

        + {% if step.content.content %} +

        {{ step.content.content }}

        + {% else %} +

        {{ step.content }}

        + {% endif %} + {% if step.content.full_context and step.content.full_context != step.content.content %} +
        + Full Context +
        {{ step.content.full_context }}
        +
        + {% endif %} +
        + {% elif step.type == 'solution' %} +
        +

        āœ… Solution

        +

        {{ step.content.content }}

        + {% if step.content.full_context and step.content.full_context != step.content.content %} +
        + Full Context +
        {{ step.content.full_context }}
        +
        + {% endif %} +
        + {% elif step.type == 'planning' %} +
        +

        šŸ“‹ Planning

        + {% if step.content.content %} +
        {{ step.content.content | safe }}
        + {% else %} +

        {{ step.content }}

        + {% endif %} + {% if step.content.full_context and step.content.full_context != step.content.content %} +
        + Full Context +
        {{ step.content.full_context }}
        +
        + {% endif %} +
        + {% elif step.type == 'observation' %} +
        +

        šŸ‘ļø Observation {% if step.content.type %}({{ step.content.type }}){% endif %}

        + {% if step.content.content %} +

        {{ step.content.content }}

        + {% else %} +

        {{ step.content }}

        + {% endif %} + {% if step.content.full_context and step.content.full_context != step.content.content %} +
        + Full Context +
        {{ step.content.full_context }}
        +
        + {% endif %} +
        + {% elif step.type == 'error' %} +
        +

        āŒ {{ step.content.error_type }}

        +

        Error: {{ step.content.error_message }}

        + {% if step.content.full_log %} +
        + Full Error Log +
        {{ step.content.full_log }}
        +
        + {% endif %} +
        + {% else %} +
        +

        šŸ“ {{ step.type.title() }}

        +

        {{ step.content }}

        +
        + {% endif %} +
        +
        + {% endfor %} +
        + + + + + +
        +
        + + + + + """ diff --git a/pyproject.toml b/pyproject.toml index 04201af4a..006178d0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ authors = [ {name = "Biomni Team", email = "kexinh@cs.stanford.edu"} ] requires-python = ">=3.11" -dependencies = ["pydantic", "langchain", "python-dotenv"] +dependencies = ["pydantic", "langchain", "python-dotenv", "jinja2", "pandas"] [project.urls] Homepage = "https://github.com/snap-stanford/biomni" diff --git a/tutorials/reasoning_trace_demo.py b/tutorials/reasoning_trace_demo.py new file mode 100644 index 000000000..ccb38a253 --- /dev/null +++ b/tutorials/reasoning_trace_demo.py @@ -0,0 +1,200 @@ +#!/usr/bin/env python3 +""" +Reasoning Trace Demonstration +============================ + +This script demonstrates the reasoning trace functionality with example queries +that showcase biomni's ability to perform complex reasoning and generate +insightful visualizations. + +The queries are designed to: +- Demonstrate multi-step reasoning processes +- Generate visualizations and plots +- Show comprehensive reasoning capabilities +- Provide examples for different use cases +""" + +import os +import sys +from pathlib import Path + +# Set matplotlib backend to avoid GUI issues +os.environ["MPLBACKEND"] = "Agg" + +# Add the biomni package to the path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from biomni.agent.a1 import A1 +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +# LLM configuration - using biomni tutorial style +# Users should configure their own LLM settings as needed + +# Example queries designed to demonstrate reasoning capabilities +example_queries = [ + # Query 1: PK/PD modeling example + { + "query": "Create a PK/PD model for bepirovirsen (GSK3228836) in chronic HBV infection using the following steps:\n\n1. First, create and save a plot showing plasma concentration over 12 weeks for 150mg vs 300mg weekly subcutaneous doses. Use literature values: half-life=28 days, volume of distribution=12L.\n\n2. Next, create and save a plot of HBV DNA suppression over time, using an Imax model with IC50=2.5 mg/L and baseline viral load of 5.5 log10 IU/mL.\n\n3. Finally, create and save a plot showing predicted HBsAg levels over time, assuming baseline=3.5 log10 IU/mL and an exponential relationship between drug concentration and antigen reduction.\n\nUse plt.savefig() to save each plot - it will automatically save to the query folder.", + "expected_tools": ["scipy", "matplotlib", "numpy"], + "expected_datasets": [], + "complexity": "High - requires PK/PD modeling, viral dynamics, and biomarker prediction", + }, + # Query 2: Gene regulatory network simulation + { + "query": "Simulate a simple 3-gene regulatory network with the following interactions:\n\n1. Gene A activates Gene B (activation constant = 0.8)\n2. Gene B inhibits Gene C (inhibition constant = 0.6)\n3. Gene C activates Gene A (activation constant = 0.7)\n\nCreate and save a plot showing the dynamics of all three genes over 50 time units, starting with initial concentrations [A=0.1, B=0.2, C=0.3]. Use a simple ODE model with Hill functions for the regulatory interactions.\n\nThen create and save a phase portrait showing the relationship between Gene A and Gene B concentrations over time.\n\nUse plt.savefig() to save each plot - it will automatically save to the query folder.", + "expected_tools": ["scipy", "matplotlib", "numpy"], + "expected_datasets": [], + "complexity": "Moderate - requires ODE modeling and phase space analysis", + }, + # Query 3: Cell population dynamics simulation + { + "query": "Simulate the growth dynamics of a cancer cell population under drug treatment:\n\n1. Model exponential growth of untreated cells (growth rate = 0.1 per day)\n2. Add drug treatment starting at day 10 with a cytotoxic effect (kill rate = 0.05 per day)\n3. Include drug resistance development (resistance rate = 0.01 per day)\n\nCreate and save a plot showing total cell count over 30 days, with separate curves for sensitive and resistant populations.\n\nThen create and save a plot showing the drug concentration over time, assuming first-order elimination (half-life = 8 hours) and daily dosing.\n\nUse plt.savefig() to save each plot - it will automatically save to the query folder.", + "expected_tools": ["scipy", "matplotlib", "numpy"], + "expected_datasets": [], + "complexity": "Moderate - requires population dynamics and pharmacokinetics", + }, +] + + +def main(): + """Run reasoning trace demonstration queries.""" + + print("🧬 Reasoning Trace Demonstration") + print("=" * 70) + print("This script demonstrates biomni's reasoning trace capabilities") + print("with example queries that showcase complex reasoning processes.") + print() + + # Initialize the agent with trace reporting + agent = A1( + path="./biomni_data", # Use biomni data directory + llm="claude-sonnet-4-20250514", # Use Claude Sonnet 4 + trace_tracking=True, # Enable trace tracking + trace_output_dir="evaluation_results/reasoning_trace_demo", + timeout_seconds=600, + ) + + print(f"\nšŸ“‹ Running {len(example_queries)} example queries...") + print("These queries demonstrate reasoning trace functionality.") + + for i, query_info in enumerate(example_queries, 1): + query = query_info["query"] + expected_tools = query_info["expected_tools"] + expected_datasets = query_info["expected_datasets"] + complexity = query_info["complexity"] + + print(f"\nšŸ” Query {i}: {complexity}") + print("-" * 70) + print(f"Query: {query}") + print(f"Expected tools: {', '.join(expected_tools)}") + print(f"Expected datasets: {', '.join(expected_datasets)}") + print("=" * 70) + + try: + # Execute the query with trace reporting + log, final_result = agent.go(query) + + # Set the final result for the final user report + agent.set_final_result(final_result) + + # Generate the detailed reasoning trace report + report_path = agent.generate_trace_report() + print(f"āœ… Generated detailed reasoning trace: {report_path}") + + # Generate the final user report + final_report_path = agent.generate_final_user_report() + print(f"šŸ“‹ Generated final user report: {final_report_path}") + + # Save complete terminal output + terminal_output_path = agent.save_complete_terminal_output() + print(f"šŸ“ Saved complete terminal output: {terminal_output_path}") + + # Get trace analysis + analysis = agent.analyze_tool_usage_patterns() + print("šŸ“Š Analysis:") + print(f" - Total steps: {len(analysis.get('reasoning_steps_breakdown', {}))}") + print(f" - Code executions: {analysis.get('code_execution_frequency', {}).get('generated', 0)}") + + # Export trace data for further analysis + json_path = agent.export_trace_data("json") + print(f"šŸ“ Exported trace data: {json_path}") + + print(f"\nšŸŽÆ Final result preview: {final_result[:200]}...") + + except Exception as e: + print(f"āŒ Error processing query: {e}") + continue + + print("\n" + "=" * 70) + + print(f"\nšŸŽÆ All reasoning trace demo reports saved to: {agent.trace_reporter.output_dir}") + print("šŸ“– Open the HTML files in your browser to view detailed reasoning traces!") + print("\nšŸ’” These reports will show:") + print(" - Complete reasoning process for each query") + print(" - Code generation and execution for complex analyses") + print(" - Results interpretation and conclusions") + print(" - Generated visualizations and plots") + + +def interactive_demo_mode(): + """Interactive mode for testing custom reasoning trace queries.""" + + print("\nšŸŽ® Interactive Reasoning Trace Demo Mode") + print("Enter your queries (type 'quit' to exit):") + print("šŸ’” Try queries that involve:") + print(" - Multi-step reasoning processes") + print(" - Data analysis and visualization") + print(" - Complex problem solving") + print(" - Code generation and execution") + + agent = A1( + path="./biomni_data", # Use biomni data directory + llm="claude-sonnet-4-20250514", # Use Claude Sonnet 4 + trace_tracking=True, # Enable trace tracking + trace_output_dir="evaluation_results/reasoning_trace_demo", + timeout_seconds=600, + ) + + while True: + try: + query = input("\nšŸ” Enter your query: ").strip() + + if query.lower() in ["quit", "exit", "q"]: + break + + if not query: + continue + + print(f"\nšŸš€ Processing query: {query[:100]}...") + + # Execute the query + log, final_result = agent.go(query) + + # Generate reports + agent.set_final_result(final_result) + report_path = agent.generate_trace_report() + final_report_path = agent.generate_final_user_report() + terminal_output_path = agent.save_complete_terminal_output() + json_path = agent.export_trace_data("json") + + print("āœ… Reports generated:") + print(f" - Detailed trace: {report_path}") + print(f" - Final report: {final_report_path}") + print(f" - Terminal output: {terminal_output_path}") + print(f" - Trace data: {json_path}") + + except KeyboardInterrupt: + print("\nšŸ‘‹ Goodbye!") + break + except Exception as e: + print(f"āŒ Error: {e}") + + +if __name__ == "__main__": + if len(sys.argv) > 1 and sys.argv[1] == "--interactive": + interactive_demo_mode() + else: + main()