Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,5 @@ cache/
.Trashes
ehthumbs.db
Thumbs.db

/_submitit_logs/
32 changes: 32 additions & 0 deletions download_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Download model from Hub to a local path.

Usage:
uv run download_model.py \
--repo_name Qwen/Qwen3-0.6B \
--local_path /model-weights/Qwen3-0.6B

uv run download_model.py \
--repo_name Qwen/Qwen3-4B-Thinking-2507 \
--local_path /model-weights/Qwen3-4B-Thinking-2507

uv run download_model.py \
--repo_name Qwen/Qwen3-1.7B \
--local_path /model-weights/Qwen3-1.7B
"""

import argparse

from transformers import AutoModelForCausalLM, AutoTokenizer


parser = argparse.ArgumentParser()
parser.add_argument("--repo_name", required=True)
parser.add_argument("--local_path", required=True)

if __name__ == "__main__":
args = parser.parse_args()
AutoTokenizer.from_pretrained(args.repo_name).save_pretrained(args.local_path)
AutoModelForCausalLM.from_pretrained(args.repo_name).save_pretrained(
args.local_path
)
33 changes: 23 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,30 @@ description = "Starter templates that launch on Slurm via Hydra + Submitit"
authors = [{name = "Farnaz Kohankhaki", email = "[email protected]"}]
requires-python = "==3.12.*"
dependencies = [
"hydra-core>=1.3.2,<1.4",
"hydra-submitit-launcher>=1.2.0,<1.5",
"submitit>=1.5.0,<2.0",
"torch>=2.0.0,<2.6.0",
"transformers>=4.36.0,<4.52.0",
"datasets>=2.16.0,<3.6.0",
"accelerate>=0.26.0,<1.0.0",
"hydra-core>=1.3.2",
"hydra-submitit-launcher>=1.2.0",
"submitit>=1.5.0",
"torch>=2.0.0",
"transformers>=4.36.0",
"datasets>=2.16.0",
"accelerate>=0.26.0",
"pyarrow==16.1.0",
"pillow>=10.0.0,<11.0.0",
"pillow>=10.0.0",
"ruff>=0.1.0,<1.0.0",
"pre-commit>=3.0.0,<4.0.0",
"pydantic>=2.11.7",
"rich>=14.1.0",
"openai-agents>=0.2.11",
"basedpyright>=1.31.4",
"langfuse>=3.3.4",
"nest-asyncio>=1.6.0",
"pydantic-ai[logfire]>=1.0.6",
"vllm>=0.11.0",
]

[dependency-groups]
dev = [
"basedpyright>=1.31.4",
]

[tool.ruff]
Expand All @@ -38,7 +51,7 @@ docstring-code-format = true
[tool.ruff.lint]
select = ["A","B","COM","C4","RET","SIM","ICN","Q","RSE","D","E","F","I","W","N","ERA","PL"]
fixable = ["A","B","COM","C4","RET","SIM","ICN","Q","RSE","D","E","F","I","W","N","ERA","PL"]
ignore = ["B905","E501","D203","D213","PLR2004","PLR0913","COM812"]
ignore = ["B905","E501","D203","D213","PLC0415","PLR2004","PLR0913","COM812", "ERA001"]

[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["E402","F401","F403","F811"]
Expand All @@ -53,4 +66,4 @@ lines-after-imports = 2
convention = "numpy"

[tool.ruff.lint.pycodestyle]
max-doc-length = 88
max-doc-length = 88
22 changes: 22 additions & 0 deletions run_in_container.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

# Script for running vLLM on Bon Echo
# Example:
# bash run_in_container.sh uv run vllm serve /model-weights/Qwen3-8B
source ~/.bashrc
source /opt/lmod/lmod/init/bash
export MODULEPATH=/opt/modulefiles:/pkgs/modulefiles:/pkgs/environment-modules

module load singularity-ce
export SINGULARITYENV_SLURM_CONF=/opt/slurm/etc/slurm.conf
export SINGULARITYENV_PATH="/opt/slurm/bin:$PATH"
export SINGULARITYENV_LD_LIBRARY_PATH="/opt/slurm/lib:/opt/slurm/lib64:/opt/munge/lib:/opt/munge/lib64:${LD_LIBRARY_PATH:-}"

singularity exec \
--nv \
--bind /model-weights:/model-weights \
--bind /projects/llm:/projects/llm \
--bind $HOME:$HOME \
--bind $SCRATCH:$SCRATCH \
/projects/llm/unsloth-vllm-trl-latest.sif \
bash run_in_venv.sh $@
5 changes: 5 additions & 0 deletions run_in_venv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/bash
unset VIRTUAL_ENV
unset VIRTUAL_ENV_PROMPT
source $SCRATCH/uv-venvs/vllm-serving/bin/activate
$@
6 changes: 6 additions & 0 deletions starters/llm_fine_tuning/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
### LLM fine-tuning starters

This directory includes minimal examples for LLM fine-tuning with vec-tool:

- [text_classification](text_classification/): Fine-tunes a small Transformer on AG News using Hugging Face Trainer, with Submitit-compatible checkpointing and resume.
- [rlvr](rlvr/): RL fine-tuning, where reward is extrinsic and verifiable.
75 changes: 75 additions & 0 deletions starters/llm_fine_tuning/rlvr/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# RL with Verifiable Reward (RLVR) Reference Implementations

This folder contains scripts for running RLVR algorithms on LLMs on the Vector cluster.

Supported algorithms:

- GRPO

Features:

- Compatibility with Chat Completion models.
- LLM-as-a-judge for more involved reward verifications.
- Optimized for heterogenous compute environments- run backpropagation on H100/A100, and use L40S/A40/RTX8000 for rollout and LLM judge via dedicated SLURM jobs (see [submitit_vllm.py](submitit_vllm.py) for details.)

Current limitations and TODO items:

- Single-GPU finetuning only.
- Backprop GPU does not participate in rollouts.
- Rollout GPUs might sit idle when all rollouts are done and only eval is pending.
- Verify support for function calling via Chat Completion.
- Integrate LangFuse datasets to track eval traces across steps.

## Setup

Basic: you will need uv and a working PyTorch installation. Running from within a container is possible, but you need to make sure SLURM commands are available from within the container.

### Option A- running vLLM in uv venv

Make sure vLLM runs on your environment.

- Create vLLM uv venv following [instructions from vllm](https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html#set-up-using-python)
- Make a copy of [run_in_venv.sh](/run_in_venv.sh) and point the uv venv to your newly-created vllm venv.
- Remember to `chmod a+x <your_script.sh>`
- Make sure that in a new GPU job, `<your_script.sh> uv run vllm serve <EXAMPLE_MODEL_NAME>` launches the vLLM server.

Example: clusters running modern Linux distros for which pre-built vLLM wheels are available

- Vector Institute Killarney
- Mila TamIA

### Option B- running vLLM via Singularity

It might be difficult to install vLLM on some clusters- e.g., unusual Linux distribution. As long as vLLM runs through Singularity on these environments, these reference implementations would work there as well. Steps:

- Make sure you can manually spin up singularity and run `vllm serve` from within the GPU container.
- Make a copy of [run_in_container.sh](/run_in_container.sh) and point to your singularity image. Remember, all scripts will be added to `$@` and sent to singularity.
- Remember to `chmod a+x <your_script.sh>`
- Make sure that in a new GPU job, `<your_script.sh> uv run vllm serve <EXAMPLE_MODEL_NAME>` launches the vLLM server.

Examples:

- Vector Institute Bon Echo
- Compute Canada Narval

## Run Trainer

Refer to the top of [trainer.py](grpo/trainer.py) for example usage.

## Adapting to your workflow

Configurable options:

- GRPO hyperparameters
- dataset (the example uses `openai/gsm8k`)
- evaluation scheme and LLM judge setup

## Optional- Observability Integration

Set up LangFuse to track the output of your models as training proceeds.

```bash
export LANGFUSE_SECRET_KEY="sk-lf-..."
export LANGFUSE_PUBLIC_KEY="pk-lf-..."
export LANGFUSE_HOST="https://us.cloud.langfuse.com"
```
176 changes: 176 additions & 0 deletions starters/llm_fine_tuning/rlvr/agents_integration/examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
"""
OpenAI Agents SDK demo: function tool `get_weather` + minimal agent.

Setup (with astral-uv):
uv venv && uv pip install -U pip
uv add openai-agents pydantic
# Or, if you prefer pip: `pip install openai-agents pydantic`

Environment:
export OPENAI_API_KEY=... # Required for the default OpenAI client

Run:
uv run python agents_sdk_get_weather_demo.py
"""

from __future__ import annotations

from datetime import date, datetime, timedelta, timezone
from typing import Literal

from agents import Agent, Runner, function_tool
from pydantic import BaseModel, Field


class WeatherReport(BaseModel):
"""Structured output for a weather report.

Attributes
----------
city:
Echo of the requested city (canonicalized).
unit:
"c" for Celsius or "f" for Fahrenheit.
temperature:
Air temperature in the requested unit.
feels_like:
Apparent temperature in the requested unit.
condition:
One of: "clear", "partly cloudy", "cloudy", "rain", "snow", "windy".
humidity:
Relative humidity percentage (0–100).
wind_kph:
Wind speed in kilometers per hour.
observation_time:
UTC timestamp when the reading was generated.
"""

city: str
unit: Literal["c", "f"]
temperature: float
feels_like: float
condition: Literal["clear", "partly cloudy", "cloudy", "rain", "snow", "windy"]
humidity: int = Field(ge=0, le=100)
wind_kph: float = Field(ge=0)
observation_time: datetime


@function_tool
def get_weather(
city: str,
unit: Literal["c", "f"] = "c",
when: Literal["now", "today", "tomorrow"] = "now",
) -> str:
"""Return a deterministic, mock weather report for demos.

The function is *offline* and *stable across runs* for a given `(city, date)`
so it's ideal for showcasing **function-tool** calls without network flakiness.

Args:
city:
Human-readable city name (e.g., "Vancouver").
unit:
Temperature unit: "c" for Celsius, "f" for Fahrenheit. Defaults to "c".
when:
Time window for the report: "now", "today", or "tomorrow". Defaults to "now".

Returns:
JSON string representing a `WeatherReport`.
"""
canonical = city.strip()

# City baselines (°C). Extend this mapping to taste.
baselines: dict[str, float] = {
"vancouver": 14.0,
"new york": 12.0,
"london": 11.0,
"singapore": 28.0,
"shanghai": 28.0,
"auckland": 20.0,
"tokyo": 17.0,
"paris": 13.0,
"san francisco": 16.0,
"berlin": 12.0,
"mexico city": 19.0,
}

key = canonical.lower()
base_c = baselines.get(key, 15.0)

# Reference date for deterministic seeding
today = date.today()
ref_date = today if when in ("now", "today") else today + timedelta(days=1)

# Seeded pseudo-randoms derived from (city, date)
seed = abs(hash(f"{key}|{ref_date.isoformat()}"))

def prand(a: float, b: float, salt: int) -> float:
# Deterministic pseudo-random in [a, b]
return a + (seed ^ salt) % 10 / 9.0 * (b - a)

temp_c = base_c + prand(-4.0, 4.0, 0xA5A5) - 0.5
humidity = int(round(prand(40, 90, 0xB6B6)))
wind_kph = round(prand(0.0, 30.0, 0xC7C7), 1)

band = seed % 100
if band < 20:
condition = "clear"
elif band < 45:
condition = "partly cloudy"
elif band < 65:
condition = "cloudy"
elif band < 85:
condition = "rain"
elif band < 95:
condition = "windy"
else:
condition = "snow"

feels_c = temp_c - 0.1 * wind_kph + 0.02 * (humidity - 50)

def to_unit(tc: float, u: Literal["c", "f"]) -> float:
return round(tc if u == "c" else (tc * 9 / 5 + 32), 1)

report = WeatherReport(
city=canonical,
unit=unit,
temperature=to_unit(temp_c, unit),
feels_like=to_unit(feels_c, unit),
condition=condition, # type: ignore[arg-type]
humidity=humidity,
wind_kph=wind_kph,
observation_time=datetime.now(timezone.utc),
)

# Agents SDK tools should return a string (or something that stringifies cleanly).
return report.model_dump_json()


# --- Minimal agent wiring ----------------------------------------------------
weather_agent = Agent(
name="Weather Helper",
instructions=(
"You answer weather questions. When the user asks about weather, "
"call the `get_weather` tool. If it returns JSON, parse it and reply "
"concisely with temperature, feels-like, condition, and units."
),
tools=[get_weather], # register the function tool
)


def main() -> None:
"""Run a single demo turn with the agent and print the final output."""
# Example inputs that strongly encourage tool use
user_inputs: list[str] = [
"What's the weather in Vancouver today in celsius?",
"NYC now, in Fahrenheit — include feels-like and wind, please.",
]

for i, prompt in enumerate(user_inputs, start=1):
print(f"\n=== Demo turn {i} ===")
result = Runner.run_sync(weather_agent, prompt)
print(result.final_output)


if __name__ == "__main__":
main()
Loading