Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
78 commits
Select commit Hold shift + click to select a range
75183ed
update
yyDing1 Sep 23, 2025
ffb4ba3
Merge branch 'volcengine:main' into rm
yyDing1 Sep 25, 2025
99adc05
update reward config
yyDing1 Sep 25, 2025
38af059
change replica
yyDing1 Sep 25, 2025
6d664a5
restore test file
yyDing1 Sep 25, 2025
f32cf86
Merge branch 'volcengine:main' into rm
yyDing1 Sep 25, 2025
8965917
update
yyDing1 Sep 25, 2025
3e8224e
update
yyDing1 Sep 25, 2025
86d739d
fix ci
yyDing1 Sep 25, 2025
ade706e
update
yyDing1 Sep 26, 2025
ba613d9
update
yyDing1 Sep 26, 2025
69d1f5f
Merge branch 'volcengine:main' into fapo
yyDing1 Sep 26, 2025
2b7222f
update
yyDing1 Sep 26, 2025
cb58490
update
yyDing1 Sep 27, 2025
abb0b64
update
yyDing1 Sep 27, 2025
8910f1c
Merge branch 'volcengine:main' into fapo
yyDing1 Sep 27, 2025
77a4368
update
yyDing1 Sep 27, 2025
5833c54
update
yyDing1 Sep 28, 2025
4ed7416
update
yyDing1 Sep 28, 2025
8827de3
update
yyDing1 Sep 29, 2025
984658c
update
yyDing1 Sep 30, 2025
1296c50
update
yyDing1 Sep 30, 2025
b9c7d3c
update
yyDing1 Sep 30, 2025
c3f6013
update
yyDing1 Sep 30, 2025
ed2d771
update
yyDing1 Sep 30, 2025
fbd3ce2
Merge branch 'volcengine:main' into fapo
yyDing1 Oct 1, 2025
29e23fd
update
yyDing1 Oct 1, 2025
e7b41f2
update
yyDing1 Oct 1, 2025
c45a916
update
yyDing1 Oct 1, 2025
c41bfd6
fix
yyDing1 Oct 2, 2025
37f5454
fix
yyDing1 Oct 2, 2025
c76cbbe
fix
yyDing1 Oct 2, 2025
ce993f7
update
yyDing1 Oct 2, 2025
7804219
update
yyDing1 Oct 2, 2025
4d898c2
update
yyDing1 Oct 2, 2025
7fbba99
update
yyDing1 Oct 2, 2025
cdddde9
update
yyDing1 Oct 2, 2025
694b001
update
yyDing1 Oct 2, 2025
62c82a0
update
yyDing1 Oct 3, 2025
0e74a62
update
yyDing1 Oct 3, 2025
9de31ec
update
yyDing1 Oct 4, 2025
beadc1a
update
yyDing1 Oct 4, 2025
39b7908
update
yyDing1 Oct 4, 2025
c6b45ab
update
yyDing1 Oct 4, 2025
4ae2bcf
fix gemini
yyDing1 Oct 4, 2025
5e56928
update
yyDing1 Oct 5, 2025
bd1ced3
update
yyDing1 Oct 5, 2025
c00df86
fix ci
yyDing1 Oct 5, 2025
82e8e48
fix ci
yyDing1 Oct 6, 2025
5329446
fix
yyDing1 Oct 6, 2025
cda5219
fix ci
yyDing1 Oct 6, 2025
e46ed5f
fix ci
yyDing1 Oct 6, 2025
cee72ad
fix
yyDing1 Oct 6, 2025
9d6976c
fix
yyDing1 Oct 9, 2025
7c8ddfb
fix
yyDing1 Oct 9, 2025
d32a86b
fix
yyDing1 Oct 9, 2025
f4b5811
Merge branch 'volcengine:main' into fapo
yyDing1 Oct 11, 2025
eebbc26
Merge branch 'volcengine:main' into fapo
yyDing1 Oct 14, 2025
b1e6ccd
Merge branch 'volcengine:main' into fapo
yyDing1 Oct 16, 2025
46460ce
update
yyDing1 Oct 16, 2025
49068d6
add naive router
yyDing1 Oct 16, 2025
e372031
fix
yyDing1 Oct 16, 2025
89bc00e
update
yyDing1 Oct 16, 2025
68bef06
update
yyDing1 Oct 16, 2025
2ef04a7
update
yyDing1 Oct 17, 2025
0b76e6f
fix ci
yyDing1 Oct 17, 2025
bdd01e1
Merge branch 'main' into fapo
yyDing1 Oct 19, 2025
1212938
update
yyDing1 Oct 19, 2025
b088993
update
yyDing1 Oct 19, 2025
4d94256
fix ci
yyDing1 Oct 19, 2025
b878fd0
fix
yyDing1 Oct 19, 2025
5b99c44
fix
yyDing1 Oct 19, 2025
303805a
Merge branch 'volcengine:main' into fapo
yyDing1 Oct 20, 2025
dcdb883
add vllm support and ci test for agentloop with reward manager
yyDing1 Oct 20, 2025
e09c077
pre-commit reformat
yyDing1 Oct 20, 2025
cbe12c5
fix ci
yyDing1 Oct 20, 2025
49a0d7e
fix ci
yyDing1 Oct 20, 2025
b7f4ad7
update
yyDing1 Oct 21, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
# - new tests are added to workflow mentioned in 2.
# name: Check PR Title

name: reward_model
name: reward_model_sglang

on:
# Trigger the workflow on push or pull request,
Expand All @@ -46,24 +46,21 @@ on:
paths:
- "verl/**/*.py"
# Entrypoints
- ".github/workflows/reward_model.yml"
- "tests/workers/reward_model/**"

# Declare permissions just read content.
permissions:
contents: read
- ".github/workflows/reward_model_sglang.yml"
- "tests/experimental/reward/**"

# Cancel jobs on the same ref if a new one is triggered
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

# Declare permissions just read content.
permissions:
contents: read

env:
IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2"
IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
TRANSFORMERS_VERSION: "4.56.2"


jobs:
setup:
Expand All @@ -81,10 +78,10 @@ jobs:
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
mlp-image: "${{ env.IMAGE }}"

reward_model:
reward_model_sglang:
needs: setup
runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
timeout-minutes: 20 # Increase this timeout value as needed
timeout-minutes: 30 # Increase this timeout value as needed
env:
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
Expand All @@ -101,25 +98,26 @@ jobs:
- name: Install the current repository
run: |
pip3 install -e .[test]
# - name: Download model config files
# run: |
# hf download Skywork/Skywork-Reward-V2-Llama-3.2-1B --local-dir $HOME/models/Skywork/Skywork-Reward-V2-Llama-3.2-1B
# hf download verl-team/GenRM-CI-Test-1.5B --local-dir $HOME/models/verl-team/GenRM-CI-Test-1.5B
- name: Running discriminative reward model tests on 8 L20 GPUs
pip3 install sglang-router
- name: Prepare gsm8k dataset
run: |
ray stop --force
python3 examples/data_preprocess/gsm8k.py --local_dir ${HOME}/data/gsm8k
- name: Running sglang reward model tests on 8 L20 GPUs
run: |
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
pytest -s -x tests/workers/reward_model/test_discriminative_reward_model.py
- name: Running generative reward model tests on 8 L20 GPUs
ROLLOUT_NAME=sglang pytest -s -x tests/experimental/reward/test_reward_model.py
- name: Running sglang agent loop with reward manager tests on 8 L20 GPUs
run: |
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
pytest -s -x tests/workers/reward_model/test_generative_reward_model.py
ROLLOUT_NAME=sglang pytest -s -x tests/experimental/reward/test_agent_loop_reward_manager.py

cleanup:
runs-on: ubuntu-latest
needs:
[
setup,
reward_model
reward_model_sglang
]
if: always()
steps:
Expand Down
128 changes: 128 additions & 0 deletions .github/workflows/reward_model_vllm.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
# # Tests layout

# Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
# - `tests/trainer` for testing functionality related to `verl/trainer`
# - `tests/models` for testing functionality related to `verl/models`
# - ...

# There are a few folders with `special_` prefix, created for special purposes:
# - `special_distributed`: unit tests that must run with multiple GPUs
# - `special_e2e`: end-to-end tests with training/generation scripts
# - `special_npu`: tests for NPUs
# - `special_sanity`: a suite of quick sanity tests
# - `special_standalone`: a set of test that are designed to run in dedicated environments

# Accelerators for tests
# - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
# - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.

# # Workflow layout

# All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
# 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
# 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
# 3. End-to-end tests: `e2e_*.yml`
# 4. Unit tests
# - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
# - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
# - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
# - new workflow yaml is added to `.github/workflows`
# - new tests are added to workflow mentioned in 2.
# name: Check PR Title

name: reward_model_vllm

on:
# Trigger the workflow on push or pull request,
# but only for the main branch
push:
branches:
- main
- v0.*
pull_request:
branches:
- main
- v0.*
paths:
- "verl/**/*.py"
# Entrypoints
- ".github/workflows/reward_model_vllm.yml"
- "tests/experimental/reward/**"

# Cancel jobs on the same ref if a new one is triggered
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

# Declare permissions just read content.
permissions:
contents: read

env:
IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"

jobs:
setup:
if: github.repository_owner == 'volcengine'
runs-on: ubuntu-latest
outputs:
runner-label: ${{ steps.create-runner.outputs.runner-label }}
mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
steps:
- uses: actions/checkout@v4
- id: create-runner
uses: volcengine/vemlp-github-runner@v1
with:
mode: "create"
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
mlp-image: "${{ env.IMAGE }}"

reward_model_vllm:
needs: setup
runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
timeout-minutes: 30 # Increase this timeout value as needed
env:
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
HF_ENDPOINT: "https://hf-mirror.com"
HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK: "True"
NCCL_SHM_DISABLE: "1"
NCCL_P2P_DISABLE: "1"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: Install the current repository
run: |
pip3 install -e .[test]
- name: Prepare gsm8k dataset
run: |
ray stop --force
python3 examples/data_preprocess/gsm8k.py --local_dir ${HOME}/data/gsm8k
- name: Running vllm reward model tests on 8 L20 GPUs
run: |
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward/test_reward_model.py
- name: Running vllm agent loop with reward manager tests on 8 L20 GPUs
run: |
unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward/test_agent_loop_reward_manager.py

cleanup:
runs-on: ubuntu-latest
needs:
[
setup,
reward_model_vllm
]
if: always()
steps:
- id: destroy-runner
uses: volcengine/vemlp-github-runner@v1
with:
mode: "destroy"
faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
15 changes: 15 additions & 0 deletions docs/advance/reward_loop.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
Reward Loop
===========

Last updated: 10/10/2025.

.. warning::
Reward Loop is still in progress.

Reward Loop is designed for more flexible and easy-to-use reward computation.

**Design goal**:

- Support more efficient reward computation through asynchronous design
- Provide more flexible reward model interface for user costimized reward function
- Provide request level load balance between multiple reward servers
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ verl is fast with:
advance/rollout_is.md
advance/one_step_off
advance/agent_loop
advance/reward_loop
advance/fully_async

.. toctree::
Expand Down
49 changes: 49 additions & 0 deletions recipe/fapo/config/rm_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
hydra:
searchpath:
- file://verl/trainer/config

defaults:
- ppo_trainer
- _self_

reward_model:
_target_: verl.workers.config.RewardModelConfig

reward_manager: dapo
enable: False

# Whether to deploy the model to a separate resource pool.
enable_resource_pool: False
n_gpus_per_node: 0
nnodes: 0

model:
type: discriminative
path: ~/models/FsfairX-LLaMA3-RM-v0.1
external_lib: ${actor_rollout_ref.model.external_lib}
trust_remote_code: False

rollout:
_target_: verl.workers.config.RolloutConfig
name: ???
dtype: bfloat16
gpu_memory_utilization: 0.5
enforce_eager: true
cudagraph_capture_sizes: null
free_cache_engine: true
data_parallel_size: 1
expert_parallel_size: 1
tensor_model_parallel_size: 2
max_num_batched_tokens: 8192
max_model_len: null
max_num_seqs: 1024
load_format: auto
engine_kwargs: {}
limit_images: null
enable_chunked_prefill: true
enable_prefix_caching: true
disable_log_stats: true
skip_tokenizer_init: true

prompt_length: 512
response_length: 512
Loading
Loading