alexzhang13 · lakshyaag · Jan 5, 2026 · Jan 5, 2026 · Jan 6, 2026 · Jan 6, 2026
diff --git a/playground/README.md b/playground/README.md
@@ -0,0 +1,61 @@
+# RLM Playground
+
+The RLM Playground provides a web interface to interactively run RLM completions, visualize recursive calls, and debug execution across different environments.
+
+## Architecture
+
+- **Backend**: FastAPI server (`playground/server.py`) that executes RLM logic.
+- **Frontend**: Next.js visualizer (`visualizer/`) with a dedicated playground page.
+
+## Getting Started
+
+### 1. Prerequisites
+
+Ensure you have installed the project dependencies with the `playground` extra:
+
+```bash
+uv pip install -e ".[playground]"
+```
+
+### 2. Environment Setup
+
+The playground server uses a `.env` file for configuration. Create one in the root directory:
+
+```bash
+# API Keys for LLM providers
+OPENAI_API_KEY=your_key_here
+ANTHROPIC_API_KEY=your_key_here
+
+# Optional: Directory for RLM logs
+RLM_LOG_DIR=./logs
+```
+
+### 3. Running the Backend Server
+
+Start the FastAPI server. By default, it runs on `http://localhost:8000`.
+
+```bash
+uv run playground/run.py
+```
+
+The server includes:
+- **CORS Middleware**: Pre-configured to allow requests from the visualizer (port 3000).
+- **Auto-reload**: Enabled for development.
+
+### 4. Running the Visualizer Frontend
+
+Navigate to the visualizer directory and start the Next.js development server:
+
+```bash
+cd visualizer
+bun install
+bun run dev
+```
+
+Open [http://localhost:3000/playground](http://localhost:3000/playground) in your browser.
+
+## Features
+
+- **Configuration**: Toggle between different backends (OpenAI, Anthropic, Portkey, etc.) and environments (Local, Modal, Docker).
+- **Iterative Execution**: Set `max_iterations` and `max_depth` to control the recursion.
+- **Usage Tracking**: View real-time token usage and execution time for every model call.
diff --git a/playground/__init__.py b/playground/__init__.py
diff --git a/playground/models.py b/playground/models.py
@@ -0,0 +1,58 @@
+"""Pydantic models for RLM playground API requests and responses."""
+
+from typing import Any, Literal
+
+from pydantic import BaseModel, Field
+
+ClientBackend = Literal["openai", "portkey", "openrouter", "vllm", "litellm", "anthropic"]
+EnvironmentType = Literal["local", "prime", "modal"]
+
+
+class RunRequest(BaseModel):
+    """Request model for running RLM completion."""
+
+    prompt: str | dict[str, Any] = Field(..., description="Main prompt/context for the RLM")
+    root_prompt: str | None = Field(None, description="Optional root prompt visible to the root LM")
+    backend: ClientBackend = Field("openai", description="LM provider backend")
+    backend_kwargs: dict[str, Any] = Field(
+        default_factory=dict,
+        description="Backend-specific configuration (model_name, api_key, etc.)",
+    )
+    environment: EnvironmentType = Field("local", description="Execution environment type")
+    environment_kwargs: dict[str, Any] = Field(
+        default_factory=dict, description="Environment-specific configuration"
+    )
+    max_iterations: int = Field(30, ge=1, le=100, description="Maximum number of RLM iterations")
+    max_depth: int = Field(
+        1, ge=0, le=1, description="Maximum recursion depth (currently only 0 or 1)"
+    )
+    other_backends: list[ClientBackend] | None = Field(
+        None, description="Additional backends for sub-calls"
+    )
+    other_backend_kwargs: list[dict[str, Any]] | None = Field(
+        None, description="Configuration for additional backends"
+    )
+    custom_system_prompt: str | None = Field(
+        None, description="Custom system prompt to override the default"
+    )
+    verbose: bool = Field(False, description="Enable verbose console output")
+    enable_logging: bool = Field(True, description="Whether to save logs to file")
+
+
+class RunResponse(BaseModel):
+    """Response model for RLM completion result."""
+
+    success: bool = Field(..., description="Whether the completion succeeded")
+    response: str | None = Field(None, description="Final answer from RLM")
+    root_model: str | None = Field(None, description="Model name used")
+    execution_time: float | None = Field(None, description="Total execution time in seconds")
+    usage_summary: dict[str, Any] | None = Field(None, description="Token usage summary per model")
+    verbose_output: str | None = Field(None, description="Captured verbose console output")
+    error: str | None = Field(None, description="Error message if completion failed")
+
+
+class StreamEvent(BaseModel):
+    """Base model for streaming events."""
+
+    event: str = Field(..., description="Event type: metadata, iteration, complete, error")
+    data: dict[str, Any] = Field(..., description="Event data payload")
diff --git a/playground/run.py b/playground/run.py
@@ -0,0 +1,20 @@
+"""CLI entry point for running the RLM playground FastAPI server."""
+
+import sys
+from pathlib import Path
+
+import uvicorn
+
+# Add project root to Python path
+project_root = Path(__file__).parent.parent
+if str(project_root) not in sys.path:
+    sys.path.insert(0, str(project_root))
+
+
+if __name__ == "__main__":
+    uvicorn.run(
+        "playground.server:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True,
+    )
diff --git a/playground/server.py b/playground/server.py
@@ -0,0 +1,256 @@
+"""FastAPI server for RLM playground."""
+
+import asyncio
+import io
+import json
+import os
+import sys
+from concurrent.futures import ThreadPoolExecutor
+
+from dotenv import load_dotenv
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
+
+from playground.models import RunRequest, RunResponse
+from rlm import RLM
+from rlm.logger import RLMLogger
+
+# Load environment variables
+load_dotenv()
+
+# Thread pool for running RLM in the background
+executor = ThreadPoolExecutor(max_workers=10)
+
+# Create FastAPI app
+app = FastAPI(
+    title="RLM Playground API",
+    description="API for running RLM completions via web interface",
+    version="0.1.0",
+)
+
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=[
+        "http://localhost:3000",  # Next.js dev server
+        "http://127.0.0.1:3000",
+    ],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+
+@app.get("/")
+def root():
+    """Health check endpoint."""
+    return {"status": "ok", "message": "RLM Playground API"}
+
+
+def serialize_result(result):
+    """Serialize RLMChatCompletion to a dict."""
+    response_text = result.response
+    if isinstance(response_text, tuple):
+        answer_type, answer_content = response_text
+        if answer_type == "FINAL":
+            response_text = answer_content.strip().strip('"').strip("'")
+        elif answer_type == "FINAL_VAR":
+            variable_name = answer_content.strip().strip('"').strip("'")
+            response_text = f"[FINAL_VAR: {variable_name}]"
+        else:
+            response_text = (
+                answer_content if isinstance(answer_content, str) else str(answer_content)
+            )
+
+    return {
+        "success": True,
+        "response": str(response_text) if response_text else None,
+        "root_model": result.root_model,
+        "execution_time": result.execution_time,
+        "usage_summary": result.usage_summary.to_dict(),
+        "error": None,
+    }
+
+
+@app.post("/api/run/stream")
+async def run_rlm_stream(request_body: RunRequest, request: Request):
+    """Stream RLM execution via Server-Sent Events."""
+
+    async def event_generator():
+        queue: asyncio.Queue = asyncio.Queue()
+        loop = asyncio.get_event_loop()
+
+        def on_log_callback(entry: dict):
+            loop.call_soon_threadsafe(queue.put_nowait, entry)
+
+        def run_rlm_sync():
+            try:
+                # Setup logger
+                log_dir = os.getenv("RLM_LOG_DIR", "./logs")
+                logger = RLMLogger(log_dir=log_dir, on_log=on_log_callback)
+
+                # Create RLM instance
+                rlm = RLM(
+                    backend=request_body.backend,  # type: ignore
+                    backend_kwargs=request_body.backend_kwargs or {},
+                    environment=request_body.environment,  # type: ignore
+                    environment_kwargs=request_body.environment_kwargs or {},
+                    max_depth=request_body.max_depth,
+                    max_iterations=request_body.max_iterations,
+                    other_backends=request_body.other_backends,  # type: ignore
+                    other_backend_kwargs=request_body.other_backend_kwargs,
+                    custom_system_prompt=request_body.custom_system_prompt,
+                    logger=logger,
+                    verbose=False,  # Verbose output is messy in streaming
+                )
+
+                # Run completion
+                result = rlm.completion(
+                    prompt=request_body.prompt,
+                    root_prompt=request_body.root_prompt,
+                )
+
+                # Signal completion
+                loop.call_soon_threadsafe(queue.put_nowait, {"type": "complete", "result": result})
+            except Exception as e:
+                loop.call_soon_threadsafe(queue.put_nowait, {"type": "error", "error": str(e)})
+
+        # Start RLM in background thread
+        _ = executor.submit(run_rlm_sync)
+
+        try:
+            while True:
+                if await request.is_disconnected():
+                    break
+
+                try:
+                    # Wait for an event with timeout to check for disconnect
+                    entry = await asyncio.wait_for(queue.get(), timeout=1.0)
+                except TimeoutError:
+                    continue
+
+                if entry["type"] == "metadata":
+                    yield f"event: metadata\ndata: {json.dumps(entry)}\n\n"
+                elif entry["type"] == "iteration":
+                    yield f"event: iteration\ndata: {json.dumps(entry)}\n\n"
+                elif entry["type"] == "complete":
+                    yield f"event: complete\ndata: {json.dumps(serialize_result(entry['result']))}\n\n"
+                    break
+                elif entry["type"] == "error":
+                    yield f"event: error\ndata: {json.dumps({'error': entry['error']})}\n\n"
+                    break
+        finally:
+            # We don't explicitly cancel the thread as it's not easily possible in Python,
+            # but we stop yielding and let it finish or hit its own timeout.
+            pass
+
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+        },
+    )
+
+
+@app.post("/api/run", response_model=RunResponse)
+def run_rlm(request: RunRequest) -> RunResponse:
+    """
+    Run an RLM completion with the provided configuration.
+
+    This endpoint creates an RLM instance, runs a completion, and returns the result.
+    """
+    try:
+        # Setup logger if enabled
+        logger = None
+        if request.enable_logging:
+            log_dir = os.getenv("RLM_LOG_DIR", "./logs")
+            logger = RLMLogger(log_dir=log_dir)
+
+        # Capture verbose output if enabled
+        verbose_output = None
+        if request.verbose:
+            stdout_capture = io.StringIO()
+            stderr_capture = io.StringIO()
+            old_stdout = sys.stdout
+            old_stderr = sys.stderr
+
+        try:
+            if request.verbose:
+                sys.stdout = stdout_capture
+                sys.stderr = stderr_capture
+
+            # Create RLM instance
+            rlm = RLM(
+                backend=request.backend,
+                backend_kwargs=request.backend_kwargs or {},
+                environment=request.environment,
+                environment_kwargs=request.environment_kwargs or {},
+                max_depth=request.max_depth,
+                max_iterations=request.max_iterations,
+                other_backends=request.other_backends,
+                other_backend_kwargs=request.other_backend_kwargs,
+                custom_system_prompt=request.custom_system_prompt,
+                logger=logger,
+                verbose=request.verbose,
+            )
+
+            # Run completion
+            result = rlm.completion(
+                prompt=request.prompt,
+                root_prompt=request.root_prompt,
+            )
+        finally:
+            # Restore stdout/stderr and capture output
+            if request.verbose:
+                sys.stdout = old_stdout
+                sys.stderr = old_stderr
+                stdout_text = stdout_capture.getvalue()
+                stderr_text = stderr_capture.getvalue()
+                verbose_output = stdout_text + stderr_text if stderr_text else stdout_text
+
+        # Extract response - handle both string and tuple (type, content) formats
+        response_text = result.response
+        if isinstance(response_text, tuple):
+            # If it's a tuple from find_final_answer, extract the content
+            # Format: (type, content) where type is "FINAL" or "FINAL_VAR"
+            answer_type, answer_content = response_text
+            if answer_type == "FINAL":
+                # For FINAL, use the content directly
+                response_text = answer_content.strip().strip('"').strip("'")
+            elif answer_type == "FINAL_VAR":
+                # For FINAL_VAR, the content is the variable name
+                # We can't look it up here since we don't have access to the environment
+                # So we'll return a message indicating the variable name
+                variable_name = answer_content.strip().strip('"').strip("'")
+                response_text = f"[FINAL_VAR: {variable_name}]"
+            else:
+                # Fallback: just use the content
+                response_text = (
+                    answer_content if isinstance(answer_content, str) else str(answer_content)
+                )
+
+        # Convert result to response
+        return RunResponse(
+            success=True,
+            response=str(response_text) if response_text else None,
+            root_model=result.root_model,
+            execution_time=result.execution_time,
+            usage_summary=result.usage_summary.to_dict(),
+            verbose_output=verbose_output,
+            error=None,
+        )
+
+    except Exception as e:
+        # Return error response
+        return RunResponse(
+            success=False,
+            response=None,
+            root_model=None,
+            execution_time=None,
+            usage_summary=None,
+            verbose_output=None,
+            error=str(e),
+        )