- Framework Agnostic Integration: Register any agent framework or implementation with a simple function call
- Massive Scalability: Schedule tasks across large-scale compute resources (CPUs, GPUs, clusters)
- Seamless RL Integration: Serve as an independent agent rollout layer for any RL framework, automatically tracing all agent rollout trajectories.
- Agentic Reinforcement Learning Rollouts
- Agent Trajectory Generation
- Agent Evaluation
You need to write your agent rollout function following this:
import ray
@ray.remote(num_cpus=0.1)
async def your_agent_rollout_function(task: dict, endpoint: str, model: str):
"""
Your agent implementation - can be ANY framework!
Args:
task: Dictionary containing the input data for your agent
endpoint: LLM endpoint URL
model: Model name to use
Returns:
Result object with reward/score information
"""
llm = ChatOpenAI(
model=model,
openai_api_base=endpoint,
openai_api_key=task_id
)
agent = get_agent(llm) # REQUIRED
result = agent.invoke(task)
return result
- Initialize
AgentOrchestrator
from agentarium import AgentOrchestrator
from your_rl_framework import YourTrainer
from your_agent import your_agent_rollout_function
class YourAgentariumTrainer(YourTrainer):
def __init__(self, config, **kwargs):
super().__init__(config, **kwargs)
# 1. Initialize the orchestrator
self.agentarium_orchestrator = AgentOrchestrator(
server_addresses=server_addresses
proxy_port=port,
ray_config=ray_config,
...
)
# 2. Register your agent types
self.agentarium_orchestrator.register_agent_type(agent_name,
your_agent_rollout_function,
endpoint,
model)
- Training Loop Integration
def training_step(self, batch_dict: dict) -> dict:
"""Integrate Agentarium into your RL training loop"""
# 1. Set up data for processing
self.agentarium_orchestrator.initialize_batch_processing(
data=batch_dict, # Your training batch
is_train=True # Training mode
)
# 2. Run all agent rollouts in parallel across Ray cluster
self.agentarium_orchestrator.wait_for_batch_completion()
# 3. Get processed training data with rewards
batch_trajectories, agent_metrics = self.agentarium_orchestrator.generate_training_batch(
max_prompt_length=self.config.max_prompt_length,
max_response_length=self.config.max_response_length,
device=your_device
)
# 4. Clean up for next iteration
self.agentarium_orchestrator.reset_orchestrator_state()
# 5. Continue with your RL algorithm (PPO, DPO, etc.)
return self.your_rl_update(batch_trajectories, agent_metrics)
def validation_step(self, val_batch: dict) -> dict:
"""Validation using Agentarium"""
# Same pattern but with is_train=False
self.agentarium_orchestrator.initialize_batch_processing(
data=val_batch,
is_train=False # Validation mode
)
self.agentarium_orchestrator.wait_for_batch_completion()
# Get validation metrics
val_metrics = self.agentarium_orchestrator.collect_validation_metrics()
self.agentarium_orchestrator.reset_orchestrator_state()
return val_metrics
cd agentarium
# Reuse verl container
docker create --gpus all --net=host --shm-size="10g" --cap-add=SYS_ADMIN \
-v .:/workspace/agentarium \
--name agentarium \
verlai/verl:app-verl0.5-vllm0.9.1-mcore0.12.2-te2.2 \
sleep infinity
# Start the container
docker start agentarium
docker exec -it agentarium bash
# Install dependencies
python -m pip install --upgrade pip
pip install -e .
pip install verl==0.5.0
# Create conda environment
conda create -n agentarium python=3.10 -y
conda activate agentarium
pip install torch==2.7.0 torchvision==0.22.0 torchaudio==2.7.0 --index-url https://download.pytorch.org/whl/cu128
pip install flash-attn --no-build-isolation
pip install vllm==0.9.2
pip install verl==0.5.0
pip install -e .
- Train a calculator agent on GSM8k math task.
# Navigate to an example directory
cd examples/custom_calc_x
# Download training data
wget https://drive.google.com/uc?id=1FQMyKLLd6hP9dw9rfZn1EZOWNvKaDsqw -O calc-x-data.zip
unzip calc-x-data.zip -d data
# Install Agent dependencies
pip install -r requirements_calc.txt
# Start training
bash train_agentarium_ray.sh
- Reward v.s. Global Step
- Training Results
cd agentarium/visualization
pip install -r requirements_viz.txt
python run_metrics_viz.py ../../examples/mcp_calc_agent/logs/custom_calc_x_ray_metrics.csv
- Training Progress using Ray Dashboard
http://localhost:8265/
This work is inspired by Microsoft's Agent Lightning project, which pioneered the concept of training AI agents with reinforcement learning with minimal code changes.