modelscope · chenyushuo · Nov 4, 2025 · Oct 31, 2025 · Nov 3, 2025 · Nov 3, 2025
diff --git a/README.md b/README.md
@@ -82,6 +82,7 @@ Trinity-RFT is a flexible, general-purpose framework for reinforcement fine-tuni
 
 ## 🚀 News
 
+* [2025-10] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.3.2)] Trinity-RFT v0.3.2 released: bug fixes and advanced task selection & scheduling.
 * [2025-10] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.3.1)] Trinity-RFT v0.3.1 released: multi-stage training support, improved agentic RL examples, LoRA support, debug mode and new RL algorithms.
 * [2025-09] [[Release Notes](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.3.0)] Trinity-RFT v0.3.0 released: enhanced Buffer, FSDP2 & Megatron support, multi-modal models, and new RL algorithms/examples.
 * [2025-08] Introducing [CHORD](https://github.com/modelscope/Trinity-RFT/tree/main/examples/mix_chord): dynamic SFT + RL integration for advanced LLM fine-tuning ([paper](https://arxiv.org/pdf/2508.11408)).
@@ -177,14 +178,14 @@ uv sync --extra dev --extra flash_attn
 If you just want to use the package without modifying the code:
 
 ```bash
-pip install trinity-rft==0.3.1
+pip install trinity-rft
 pip install flash-attn==2.8.1
 ```
 
 Or with `uv`:
 
 ```bash
-uv pip install trinity-rft==0.3.1
+uv pip install trinity-rft
 uv pip install flash-attn==2.8.1
 ```
 

diff --git a/README_zh.md b/README_zh.md
@@ -83,6 +83,7 @@ Trinity-RFT 是一个灵活、通用的大语言模型（LLM）强化微调（RF
 
 ## 🚀 新闻
 
+* [2025-10] [[发布说明](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.3.2)] Trinity-RFT v0.3.2 发布：修复若干 Bug 并支持进阶的任务选择和调度。
 * [2025-10] [[发布说明](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.3.1)] Trinity-RFT v0.3.1 发布：多阶段训练支持、改进的智能体 RL 示例、LoRA 支持、调试模式和全新 RL 算法。
 * [2025-09] [[发布说明](https://github.com/modelscope/Trinity-RFT/releases/tag/v0.3.0)] Trinity-RFT v0.3.0 发布：增强的 Buffer、FSDP2 & Megatron 支持，多模态模型，以及全新 RL 算法/示例。
 * [2025-08] 推出 [CHORD](https://github.com/modelscope/Trinity-RFT/tree/main/examples/mix_chord)：动态 SFT + RL 集成，实现进阶 LLM 微调（[论文](https://arxiv.org/pdf/2508.11408)）。
@@ -176,14 +177,14 @@ uv sync --extra dev --extra flash_attn
 如果您只需使用 Trinity-RFT 而不打算修改代码：
 
 ```bash
-pip install trinity-rft==0.3.1
+pip install trinity-rft
 pip install flash-attn==2.8.1
 ```
 
 或使用 `uv`：
 
 ```bash
-uv pip install trinity-rft==0.3.1
+uv pip install trinity-rft
 uv pip install flash-attn==2.8.1
 ```
 

diff --git a/docs/sphinx_doc/source/tutorial/trinity_installation.md b/docs/sphinx_doc/source/tutorial/trinity_installation.md
@@ -65,14 +65,14 @@ uv sync --extra dev --extra flash_attn
 If you just want to use the package without modifying the code:
 
 ```bash
-pip install trinity-rft==0.3.1
+pip install trinity-rft
 pip install flash-attn==2.8.1
 ```
 
 Or with `uv`:
 
 ```bash
-uv pip install trinity-rft==0.3.1
+uv pip install trinity-rft
 uv pip install flash-attn==2.8.1
 ```
 

diff --git a/docs/sphinx_doc/source_zh/tutorial/trinity_installation.md b/docs/sphinx_doc/source_zh/tutorial/trinity_installation.md
@@ -65,14 +65,14 @@ uv sync --extra dev --extra flash_attn
 如果您只需使用 Trinity-RFT 而不打算修改代码：
 
 ```bash
-pip install trinity-rft==0.3.1
+pip install trinity-rft
 pip install flash-attn==2.8.1
 ```
 
 或使用 `uv`：
 
 ```bash
-uv pip install trinity-rft==0.3.1
+uv pip install trinity-rft
 uv pip install flash-attn==2.8.1
 ```
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name =  "trinity-rft"
-version = "0.4.0.dev0"
+version = "0.3.2"
 authors = [
     {name="Trinity-RFT Team", email="[email protected]"},
 ]
@@ -87,6 +87,10 @@ doc = [
     "sphinxcontrib-mermaid",
 ]
 
+mm = [
+    "qwen-vl-utils",
+]
+
 flash_attn = [
     "flash-attn==2.8.1"
 ]

diff --git a/scripts/docker/Dockerfile b/scripts/docker/Dockerfile
@@ -26,7 +26,7 @@ RUN apt update && apt install -y \
 # copy the Trinity-RFT dir into the workspace
 COPY . .
 
-RUN pip install --upgrade pip && pip install -e .[dev] && pip install flash_attn==2.8.1 --no-build-isolation
+RUN pip install --upgrade pip && pip install -e .[mm,dev] && pip install flash_attn==2.8.1 --no-build-isolation
 
 # Set Env variables
 

diff --git a/scripts/docker_for_megatron/Dockerfile b/scripts/docker_for_megatron/Dockerfile
@@ -27,7 +27,7 @@ COPY . .
 
 # Install Trinity-RFT with Megatron
 RUN pip install --upgrade pip \
-    && pip install -e .[dev] \
+    && pip install -e .[mm,dev] \
     && pip install flash_attn==2.8.1 --no-build-isolation \
     && pip install -e .[megatron] \
     && pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation \

diff --git a/tests/common/vllm_test.py b/tests/common/vllm_test.py
@@ -261,11 +261,13 @@ async def test_model_len(self):
 
         response = openai_client.chat.completions.create(model=model_id, messages=messages[1:], n=1)
         self.assertEqual(len(response.choices), 1)
-        print(response.choices[0].message.content)
         exps = self.model_wrapper.extract_experience_from_history()
         self.assertEqual(len(exps), 1)
-        # only generate max_model_len - prompt_len tokens
-        self.assertEqual(len(exps[0].tokens), self.max_model_len)
+        # only generate max_response_tokens tokens
+        self.assertEqual(
+            len(exps[0].tokens),
+            response.usage.prompt_tokens + self.config.model.max_response_tokens,
+        )
 
 
 class TestAPIServer(RayUnittestBaseAysnc):

diff --git a/trinity/__init__.py b/trinity/__init__.py
@@ -1,4 +1,4 @@
 # -*- coding: utf-8 -*-
 """Trinity-RFT (Reinforcement Fine-Tuning)"""
 
-__version__ = "0.4.0.dev0"
+__version__ = "0.3.2"
diff --git a/trinity/common/models/vllm_model.py b/trinity/common/models/vllm_model.py
@@ -51,6 +51,8 @@ def __init__(
             os.environ["VLLM_RAY_PER_WORKER_GPUS"] = str(int(config.use_v1))
             os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"
             os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0"
+        if get_vllm_version() >= parse_version("0.11.0"):
+            os.environ["VLLM_ALLREDUCE_USE_SYMM_MEM"] = "0"
         self.default_sampling_params = vllm.SamplingParams(
             n=1,
             temperature=0.0,