Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 61 additions & 0 deletions playground/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# RLM Playground

The RLM Playground provides a web interface to interactively run RLM completions, visualize recursive calls, and debug execution across different environments.

## Architecture

- **Backend**: FastAPI server (`playground/server.py`) that executes RLM logic.
- **Frontend**: Next.js visualizer (`visualizer/`) with a dedicated playground page.

## Getting Started

### 1. Prerequisites

Ensure you have installed the project dependencies with the `playground` extra:

```bash
uv pip install -e ".[playground]"
```

### 2. Environment Setup

The playground server uses a `.env` file for configuration. Create one in the root directory:

```bash
# API Keys for LLM providers
OPENAI_API_KEY=your_key_here
ANTHROPIC_API_KEY=your_key_here

# Optional: Directory for RLM logs
RLM_LOG_DIR=./logs
```

### 3. Running the Backend Server

Start the FastAPI server. By default, it runs on `http://localhost:8000`.

```bash
uv run playground/run.py
```

The server includes:
- **CORS Middleware**: Pre-configured to allow requests from the visualizer (port 3000).
- **Auto-reload**: Enabled for development.

### 4. Running the Visualizer Frontend

Navigate to the visualizer directory and start the Next.js development server:

```bash
cd visualizer
bun install
bun run dev
```

Open [http://localhost:3000/playground](http://localhost:3000/playground) in your browser.

## Features

- **Configuration**: Toggle between different backends (OpenAI, Anthropic, Portkey, etc.) and environments (Local, Modal, Docker).
- **Iterative Execution**: Set `max_iterations` and `max_depth` to control the recursion.
- **Usage Tracking**: View real-time token usage and execution time for every model call.
Empty file added playground/__init__.py
Empty file.
58 changes: 58 additions & 0 deletions playground/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Pydantic models for RLM playground API requests and responses."""

from typing import Any, Literal

from pydantic import BaseModel, Field

ClientBackend = Literal["openai", "portkey", "openrouter", "vllm", "litellm", "anthropic"]
EnvironmentType = Literal["local", "prime", "modal"]


class RunRequest(BaseModel):
"""Request model for running RLM completion."""

prompt: str | dict[str, Any] = Field(..., description="Main prompt/context for the RLM")
root_prompt: str | None = Field(None, description="Optional root prompt visible to the root LM")
backend: ClientBackend = Field("openai", description="LM provider backend")
backend_kwargs: dict[str, Any] = Field(
default_factory=dict,
description="Backend-specific configuration (model_name, api_key, etc.)",
)
environment: EnvironmentType = Field("local", description="Execution environment type")
environment_kwargs: dict[str, Any] = Field(
default_factory=dict, description="Environment-specific configuration"
)
max_iterations: int = Field(30, ge=1, le=100, description="Maximum number of RLM iterations")
max_depth: int = Field(
1, ge=0, le=1, description="Maximum recursion depth (currently only 0 or 1)"
)
other_backends: list[ClientBackend] | None = Field(
None, description="Additional backends for sub-calls"
)
other_backend_kwargs: list[dict[str, Any]] | None = Field(
None, description="Configuration for additional backends"
)
custom_system_prompt: str | None = Field(
None, description="Custom system prompt to override the default"
)
verbose: bool = Field(False, description="Enable verbose console output")
enable_logging: bool = Field(True, description="Whether to save logs to file")


class RunResponse(BaseModel):
"""Response model for RLM completion result."""

success: bool = Field(..., description="Whether the completion succeeded")
response: str | None = Field(None, description="Final answer from RLM")
root_model: str | None = Field(None, description="Model name used")
execution_time: float | None = Field(None, description="Total execution time in seconds")
usage_summary: dict[str, Any] | None = Field(None, description="Token usage summary per model")
verbose_output: str | None = Field(None, description="Captured verbose console output")
error: str | None = Field(None, description="Error message if completion failed")


class StreamEvent(BaseModel):
"""Base model for streaming events."""

event: str = Field(..., description="Event type: metadata, iteration, complete, error")
data: dict[str, Any] = Field(..., description="Event data payload")
20 changes: 20 additions & 0 deletions playground/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""CLI entry point for running the RLM playground FastAPI server."""

import sys
from pathlib import Path

import uvicorn

# Add project root to Python path
project_root = Path(__file__).parent.parent
if str(project_root) not in sys.path:
sys.path.insert(0, str(project_root))


if __name__ == "__main__":
uvicorn.run(
"playground.server:app",
host="0.0.0.0",
port=8000,
reload=True,
)
256 changes: 256 additions & 0 deletions playground/server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
"""FastAPI server for RLM playground."""

import asyncio
import io
import json
import os
import sys
from concurrent.futures import ThreadPoolExecutor

from dotenv import load_dotenv
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse

from playground.models import RunRequest, RunResponse
from rlm import RLM
from rlm.logger import RLMLogger

# Load environment variables
load_dotenv()

# Thread pool for running RLM in the background
executor = ThreadPoolExecutor(max_workers=10)

# Create FastAPI app
app = FastAPI(
title="RLM Playground API",
description="API for running RLM completions via web interface",
version="0.1.0",
)

# Configure CORS
app.add_middleware(
CORSMiddleware,
allow_origins=[
"http://localhost:3000", # Next.js dev server
"http://127.0.0.1:3000",
],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)


@app.get("/")
def root():
"""Health check endpoint."""
return {"status": "ok", "message": "RLM Playground API"}


def serialize_result(result):
"""Serialize RLMChatCompletion to a dict."""
response_text = result.response
if isinstance(response_text, tuple):
answer_type, answer_content = response_text
if answer_type == "FINAL":
response_text = answer_content.strip().strip('"').strip("'")
elif answer_type == "FINAL_VAR":
variable_name = answer_content.strip().strip('"').strip("'")
response_text = f"[FINAL_VAR: {variable_name}]"
else:
response_text = (
answer_content if isinstance(answer_content, str) else str(answer_content)
)

return {
"success": True,
"response": str(response_text) if response_text else None,
"root_model": result.root_model,
"execution_time": result.execution_time,
"usage_summary": result.usage_summary.to_dict(),
"error": None,
}


@app.post("/api/run/stream")
async def run_rlm_stream(request_body: RunRequest, request: Request):
"""Stream RLM execution via Server-Sent Events."""

async def event_generator():
queue: asyncio.Queue = asyncio.Queue()
loop = asyncio.get_event_loop()

def on_log_callback(entry: dict):
loop.call_soon_threadsafe(queue.put_nowait, entry)

def run_rlm_sync():
try:
# Setup logger
log_dir = os.getenv("RLM_LOG_DIR", "./logs")
logger = RLMLogger(log_dir=log_dir, on_log=on_log_callback)

# Create RLM instance
rlm = RLM(
backend=request_body.backend, # type: ignore
backend_kwargs=request_body.backend_kwargs or {},
environment=request_body.environment, # type: ignore
environment_kwargs=request_body.environment_kwargs or {},
max_depth=request_body.max_depth,
max_iterations=request_body.max_iterations,
other_backends=request_body.other_backends, # type: ignore
other_backend_kwargs=request_body.other_backend_kwargs,
custom_system_prompt=request_body.custom_system_prompt,
logger=logger,
verbose=False, # Verbose output is messy in streaming
)

# Run completion
result = rlm.completion(
prompt=request_body.prompt,
root_prompt=request_body.root_prompt,
)

# Signal completion
loop.call_soon_threadsafe(queue.put_nowait, {"type": "complete", "result": result})
except Exception as e:
loop.call_soon_threadsafe(queue.put_nowait, {"type": "error", "error": str(e)})

# Start RLM in background thread
_ = executor.submit(run_rlm_sync)

try:
while True:
if await request.is_disconnected():
break

try:
# Wait for an event with timeout to check for disconnect
entry = await asyncio.wait_for(queue.get(), timeout=1.0)
except TimeoutError:
continue

if entry["type"] == "metadata":
yield f"event: metadata\ndata: {json.dumps(entry)}\n\n"
elif entry["type"] == "iteration":
yield f"event: iteration\ndata: {json.dumps(entry)}\n\n"
elif entry["type"] == "complete":
yield f"event: complete\ndata: {json.dumps(serialize_result(entry['result']))}\n\n"
break
elif entry["type"] == "error":
yield f"event: error\ndata: {json.dumps({'error': entry['error']})}\n\n"
break
finally:
# We don't explicitly cancel the thread as it's not easily possible in Python,
# but we stop yielding and let it finish or hit its own timeout.
pass

return StreamingResponse(
event_generator(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
},
)


@app.post("/api/run", response_model=RunResponse)
def run_rlm(request: RunRequest) -> RunResponse:
"""
Run an RLM completion with the provided configuration.

This endpoint creates an RLM instance, runs a completion, and returns the result.
"""
try:
# Setup logger if enabled
logger = None
if request.enable_logging:
log_dir = os.getenv("RLM_LOG_DIR", "./logs")
logger = RLMLogger(log_dir=log_dir)

# Capture verbose output if enabled
verbose_output = None
if request.verbose:
stdout_capture = io.StringIO()
stderr_capture = io.StringIO()
old_stdout = sys.stdout
old_stderr = sys.stderr

try:
if request.verbose:
sys.stdout = stdout_capture
sys.stderr = stderr_capture

# Create RLM instance
rlm = RLM(
backend=request.backend,
backend_kwargs=request.backend_kwargs or {},
environment=request.environment,
environment_kwargs=request.environment_kwargs or {},
max_depth=request.max_depth,
max_iterations=request.max_iterations,
other_backends=request.other_backends,
other_backend_kwargs=request.other_backend_kwargs,
custom_system_prompt=request.custom_system_prompt,
logger=logger,
verbose=request.verbose,
)

# Run completion
result = rlm.completion(
prompt=request.prompt,
root_prompt=request.root_prompt,
)
finally:
# Restore stdout/stderr and capture output
if request.verbose:
sys.stdout = old_stdout
sys.stderr = old_stderr
stdout_text = stdout_capture.getvalue()
stderr_text = stderr_capture.getvalue()
verbose_output = stdout_text + stderr_text if stderr_text else stdout_text

# Extract response - handle both string and tuple (type, content) formats
response_text = result.response
if isinstance(response_text, tuple):
# If it's a tuple from find_final_answer, extract the content
# Format: (type, content) where type is "FINAL" or "FINAL_VAR"
answer_type, answer_content = response_text
if answer_type == "FINAL":
# For FINAL, use the content directly
response_text = answer_content.strip().strip('"').strip("'")
elif answer_type == "FINAL_VAR":
# For FINAL_VAR, the content is the variable name
# We can't look it up here since we don't have access to the environment
# So we'll return a message indicating the variable name
variable_name = answer_content.strip().strip('"').strip("'")
response_text = f"[FINAL_VAR: {variable_name}]"
else:
# Fallback: just use the content
response_text = (
answer_content if isinstance(answer_content, str) else str(answer_content)
)

# Convert result to response
return RunResponse(
success=True,
response=str(response_text) if response_text else None,
root_model=result.root_model,
execution_time=result.execution_time,
usage_summary=result.usage_summary.to_dict(),
verbose_output=verbose_output,
error=None,
)

except Exception as e:
# Return error response
return RunResponse(
success=False,
response=None,
root_model=None,
execution_time=None,
usage_summary=None,
verbose_output=None,
error=str(e),
)
Loading