VectorInstitute · jacobthebanana · Sep 8, 2025 · Sep 15, 2025 · Sep 15, 2025 · Sep 15, 2025
diff --git a/.gitignore b/.gitignore
@@ -232,3 +232,5 @@ cache/
 .Trashes
 ehthumbs.db
 Thumbs.db
+
+/_submitit_logs/
diff --git a/download_model.py b/download_model.py
@@ -0,0 +1,32 @@
+"""
+Download model from Hub to a local path.
+
+Usage:
+uv run download_model.py \
+--repo_name Qwen/Qwen3-0.6B \
+--local_path /model-weights/Qwen3-0.6B
+
+uv run download_model.py \
+--repo_name Qwen/Qwen3-4B-Thinking-2507 \
+--local_path /model-weights/Qwen3-4B-Thinking-2507
+
+uv run download_model.py \
+--repo_name Qwen/Qwen3-1.7B \
+--local_path /model-weights/Qwen3-1.7B
+"""
+
+import argparse
+
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--repo_name", required=True)
+parser.add_argument("--local_path", required=True)
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    AutoTokenizer.from_pretrained(args.repo_name).save_pretrained(args.local_path)
+    AutoModelForCausalLM.from_pretrained(args.repo_name).save_pretrained(
+        args.local_path
+    )
diff --git a/pyproject.toml b/pyproject.toml
@@ -13,17 +13,30 @@ description = "Starter templates that launch on Slurm via Hydra + Submitit"
 authors = [{name = "Farnaz Kohankhaki", email = "[email protected]"}]
 requires-python = "==3.12.*"
 dependencies = [
-    "hydra-core>=1.3.2,<1.4",
-    "hydra-submitit-launcher>=1.2.0,<1.5",
-    "submitit>=1.5.0,<2.0",
-    "torch>=2.0.0,<2.6.0",
-    "transformers>=4.36.0,<4.52.0",
-    "datasets>=2.16.0,<3.6.0",
-    "accelerate>=0.26.0,<1.0.0",
+    "hydra-core>=1.3.2",
+    "hydra-submitit-launcher>=1.2.0",
+    "submitit>=1.5.0",
+    "torch>=2.0.0",
+    "transformers>=4.36.0",
+    "datasets>=2.16.0",
+    "accelerate>=0.26.0",
     "pyarrow==16.1.0",
-    "pillow>=10.0.0,<11.0.0",
+    "pillow>=10.0.0",
     "ruff>=0.1.0,<1.0.0",
     "pre-commit>=3.0.0,<4.0.0",
+    "pydantic>=2.11.7",
+    "rich>=14.1.0",
+    "openai-agents>=0.2.11",
+    "basedpyright>=1.31.4",
+    "langfuse>=3.3.4",
+    "nest-asyncio>=1.6.0",
+    "pydantic-ai[logfire]>=1.0.6",
+    "vllm>=0.11.0",
+]
+
+[dependency-groups]
+dev = [
+    "basedpyright>=1.31.4",
 ]
 
 [tool.ruff]
@@ -38,7 +51,7 @@ docstring-code-format = true
 [tool.ruff.lint]
 select = ["A","B","COM","C4","RET","SIM","ICN","Q","RSE","D","E","F","I","W","N","ERA","PL"]
 fixable = ["A","B","COM","C4","RET","SIM","ICN","Q","RSE","D","E","F","I","W","N","ERA","PL"]
-ignore = ["B905","E501","D203","D213","PLR2004","PLR0913","COM812"]
+ignore = ["B905","E501","D203","D213","PLC0415","PLR2004","PLR0913","COM812", "ERA001"]
 
 [tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["E402","F401","F403","F811"]
@@ -53,4 +66,4 @@ lines-after-imports = 2
 convention = "numpy"
 
 [tool.ruff.lint.pycodestyle]
-max-doc-length = 88
+max-doc-length = 88
diff --git a/run_in_container.sh b/run_in_container.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Script for running vLLM on Bon Echo
+# Example:
+# bash run_in_container.sh uv run vllm serve /model-weights/Qwen3-8B  
+source ~/.bashrc
+source /opt/lmod/lmod/init/bash
+export MODULEPATH=/opt/modulefiles:/pkgs/modulefiles:/pkgs/environment-modules
+
+module load singularity-ce
+export SINGULARITYENV_SLURM_CONF=/opt/slurm/etc/slurm.conf
+export SINGULARITYENV_PATH="/opt/slurm/bin:$PATH"
+export SINGULARITYENV_LD_LIBRARY_PATH="/opt/slurm/lib:/opt/slurm/lib64:/opt/munge/lib:/opt/munge/lib64:${LD_LIBRARY_PATH:-}"
+
+singularity exec \
+--nv \
+--bind /model-weights:/model-weights \
+--bind /projects/llm:/projects/llm \
+--bind $HOME:$HOME \
+--bind $SCRATCH:$SCRATCH \
+/projects/llm/unsloth-vllm-trl-latest.sif \
+bash run_in_venv.sh $@
diff --git a/run_in_venv.sh b/run_in_venv.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+unset VIRTUAL_ENV
+unset VIRTUAL_ENV_PROMPT
+source $SCRATCH/uv-venvs/vllm-serving/bin/activate
+$@
diff --git a/starters/llm_fine_tuning/README.md b/starters/llm_fine_tuning/README.md
@@ -0,0 +1,6 @@
+### LLM fine-tuning starters
+
+This directory includes minimal examples for LLM fine-tuning with vec-tool:
+
+- [text_classification](text_classification/): Fine-tunes a small Transformer on AG News using Hugging Face Trainer, with Submitit-compatible checkpointing and resume.
+- [rlvr](rlvr/): RL fine-tuning, where reward is extrinsic and verifiable.
diff --git a/starters/llm_fine_tuning/rlvr/README.md b/starters/llm_fine_tuning/rlvr/README.md
@@ -0,0 +1,75 @@
+# RL with Verifiable Reward (RLVR) Reference Implementations
+
+This folder contains scripts for running RLVR algorithms on LLMs on the Vector cluster.
+
+Supported algorithms:
+
+- GRPO
+
+Features:
+
+- Compatibility with Chat Completion models.
+- LLM-as-a-judge for more involved reward verifications.
+- Optimized for heterogenous compute environments- run backpropagation on H100/A100, and use L40S/A40/RTX8000 for rollout and LLM judge via dedicated SLURM jobs (see [submitit_vllm.py](submitit_vllm.py) for details.)
+
+Current limitations and TODO items:
+
+- Single-GPU finetuning only.
+- Backprop GPU does not participate in rollouts.
+- Rollout GPUs might sit idle when all rollouts are done and only eval is pending.
+- Verify support for function calling via Chat Completion.
+- Integrate LangFuse datasets to track eval traces across steps.
+
+## Setup
+
+Basic: you will need uv and a working PyTorch installation. Running from within a container is possible, but you need to make sure SLURM commands are available from within the container.
+
+### Option A- running vLLM in uv venv
+
+Make sure vLLM runs on your environment.
+
+- Create vLLM uv venv following [instructions from vllm](https://docs.vllm.ai/en/stable/getting_started/installation/gpu.html#set-up-using-python)
+- Make a copy of [run_in_venv.sh](/run_in_venv.sh) and point the uv venv to your newly-created vllm venv.
+- Remember to `chmod a+x <your_script.sh>`
+- Make sure that in a new GPU job, `<your_script.sh> uv run vllm serve <EXAMPLE_MODEL_NAME>` launches the vLLM server.
+
+Example: clusters running modern Linux distros for which pre-built vLLM wheels are available
+
+- Vector Institute Killarney
+- Mila TamIA
+
+### Option B- running vLLM via Singularity
+
+It might be difficult to install vLLM on some clusters- e.g., unusual Linux distribution. As long as vLLM runs through Singularity on these environments, these reference implementations would work there as well. Steps:
+
+- Make sure you can manually spin up singularity and run `vllm serve` from within the GPU container.
+- Make a copy of [run_in_container.sh](/run_in_container.sh) and point to your singularity image. Remember, all scripts will be added to `$@` and sent to singularity.
+- Remember to `chmod a+x <your_script.sh>`
+- Make sure that in a new GPU job, `<your_script.sh> uv run vllm serve <EXAMPLE_MODEL_NAME>` launches the vLLM server.
+
+Examples:
+
+- Vector Institute Bon Echo
+- Compute Canada Narval
+
+## Run Trainer
+
+Refer to the top of [trainer.py](grpo/trainer.py) for example usage.
+
+## Adapting to your workflow
+
+Configurable options:
+
+- GRPO hyperparameters
+- dataset (the example uses `openai/gsm8k`)
+- evaluation scheme and LLM judge setup
+
+## Optional- Observability Integration
+
+Set up LangFuse to track the output of your models as training proceeds.
+
+```bash
+export LANGFUSE_SECRET_KEY="sk-lf-..."
+export LANGFUSE_PUBLIC_KEY="pk-lf-..."
+export LANGFUSE_HOST="https://us.cloud.langfuse.com"
+```
diff --git a/starters/llm_fine_tuning/rlvr/agents_integration/examples.py b/starters/llm_fine_tuning/rlvr/agents_integration/examples.py
@@ -0,0 +1,176 @@
+"""
+OpenAI Agents SDK demo: function tool `get_weather` + minimal agent.
+
+Setup (with astral-uv):
+    uv venv && uv pip install -U pip
+    uv add openai-agents pydantic
+    # Or, if you prefer pip: `pip install openai-agents pydantic`
+
+Environment:
+    export OPENAI_API_KEY=...  # Required for the default OpenAI client
+
+Run:
+    uv run python agents_sdk_get_weather_demo.py
+"""
+
+from __future__ import annotations
+
+from datetime import date, datetime, timedelta, timezone
+from typing import Literal
+
+from agents import Agent, Runner, function_tool
+from pydantic import BaseModel, Field
+
+
+class WeatherReport(BaseModel):
+    """Structured output for a weather report.
+
+    Attributes
+    ----------
+    city:
+        Echo of the requested city (canonicalized).
+    unit:
+        "c" for Celsius or "f" for Fahrenheit.
+    temperature:
+        Air temperature in the requested unit.
+    feels_like:
+        Apparent temperature in the requested unit.
+    condition:
+        One of: "clear", "partly cloudy", "cloudy", "rain", "snow", "windy".
+    humidity:
+        Relative humidity percentage (0–100).
+    wind_kph:
+        Wind speed in kilometers per hour.
+    observation_time:
+        UTC timestamp when the reading was generated.
+    """
+
+    city: str
+    unit: Literal["c", "f"]
+    temperature: float
+    feels_like: float
+    condition: Literal["clear", "partly cloudy", "cloudy", "rain", "snow", "windy"]
+    humidity: int = Field(ge=0, le=100)
+    wind_kph: float = Field(ge=0)
+    observation_time: datetime
+
+
+@function_tool
+def get_weather(
+    city: str,
+    unit: Literal["c", "f"] = "c",
+    when: Literal["now", "today", "tomorrow"] = "now",
+) -> str:
+    """Return a deterministic, mock weather report for demos.
+
+    The function is *offline* and *stable across runs* for a given `(city, date)`
+    so it's ideal for showcasing **function-tool** calls without network flakiness.
+
+    Args:
+        city:
+            Human-readable city name (e.g., "Vancouver").
+        unit:
+            Temperature unit: "c" for Celsius, "f" for Fahrenheit. Defaults to "c".
+        when:
+            Time window for the report: "now", "today", or "tomorrow". Defaults to "now".
+
+    Returns:
+        JSON string representing a `WeatherReport`.
+    """
+    canonical = city.strip()
+
+    # City baselines (°C). Extend this mapping to taste.
+    baselines: dict[str, float] = {
+        "vancouver": 14.0,
+        "new york": 12.0,
+        "london": 11.0,
+        "singapore": 28.0,
+        "shanghai": 28.0,
+        "auckland": 20.0,
+        "tokyo": 17.0,
+        "paris": 13.0,
+        "san francisco": 16.0,
+        "berlin": 12.0,
+        "mexico city": 19.0,
+    }
+
+    key = canonical.lower()
+    base_c = baselines.get(key, 15.0)
+
+    # Reference date for deterministic seeding
+    today = date.today()
+    ref_date = today if when in ("now", "today") else today + timedelta(days=1)
+
+    # Seeded pseudo-randoms derived from (city, date)
+    seed = abs(hash(f"{key}|{ref_date.isoformat()}"))
+
+    def prand(a: float, b: float, salt: int) -> float:
+        # Deterministic pseudo-random in [a, b]
+        return a + (seed ^ salt) % 10 / 9.0 * (b - a)
+
+    temp_c = base_c + prand(-4.0, 4.0, 0xA5A5) - 0.5
+    humidity = int(round(prand(40, 90, 0xB6B6)))
+    wind_kph = round(prand(0.0, 30.0, 0xC7C7), 1)
+
+    band = seed % 100
+    if band < 20:
+        condition = "clear"
+    elif band < 45:
+        condition = "partly cloudy"
+    elif band < 65:
+        condition = "cloudy"
+    elif band < 85:
+        condition = "rain"
+    elif band < 95:
+        condition = "windy"
+    else:
+        condition = "snow"
+
+    feels_c = temp_c - 0.1 * wind_kph + 0.02 * (humidity - 50)
+
+    def to_unit(tc: float, u: Literal["c", "f"]) -> float:
+        return round(tc if u == "c" else (tc * 9 / 5 + 32), 1)
+
+    report = WeatherReport(
+        city=canonical,
+        unit=unit,
+        temperature=to_unit(temp_c, unit),
+        feels_like=to_unit(feels_c, unit),
+        condition=condition,  # type: ignore[arg-type]
+        humidity=humidity,
+        wind_kph=wind_kph,
+        observation_time=datetime.now(timezone.utc),
+    )
+
+    # Agents SDK tools should return a string (or something that stringifies cleanly).
+    return report.model_dump_json()
+
+
+# --- Minimal agent wiring ----------------------------------------------------
+weather_agent = Agent(
+    name="Weather Helper",
+    instructions=(
+        "You answer weather questions. When the user asks about weather, "
+        "call the `get_weather` tool. If it returns JSON, parse it and reply "
+        "concisely with temperature, feels-like, condition, and units."
+    ),
+    tools=[get_weather],  # register the function tool
+)
+
+
+def main() -> None:
+    """Run a single demo turn with the agent and print the final output."""
+    # Example inputs that strongly encourage tool use
+    user_inputs: list[str] = [
+        "What's the weather in Vancouver today in celsius?",
+        "NYC now, in Fahrenheit — include feels-like and wind, please.",
+    ]
+
+    for i, prompt in enumerate(user_inputs, start=1):
+        print(f"\n=== Demo turn {i} ===")
+        result = Runner.run_sync(weather_agent, prompt)
+        print(result.final_output)
+
+
+if __name__ == "__main__":
+    main()