diff --git a/README.md b/README.md
index 6127c09241d..585c1bd37ad 100644
--- a/README.md
+++ b/README.md
@@ -32,8 +32,8 @@ Trinity-RFT provides functionalities for users with different backgrounds and ob
## 🚀 News
+* [2026-04] [[Release Notes]](https://github.com/agentscope-ai/Trinity-RFT/releases/tag/v0.5.2) Trinity-RFT v0.5.2 released: support for Qwen3.5 series, bug fixes and optimizations.
* [2026-03] 🤖 Trinity-RFT empowers the training of CoPaw-Flash, building a small agent model better suited for localized scenarios. Feel free to try CoPaw-Flash on [CoPaw](https://github.com/agentscope-ai/CoPaw); models are also available on [ModelScope](https://www.modelscope.cn/organization/AgentScope) and [HuggingFace](https://huggingface.co/agentscope-ai) ([News](https://mp.weixin.qq.com/s/-BXNU_PMi6QJuwSB5BqTbQ)).
-* [2026-03] Trinity-RFT now supports Qwen3.5 series.
* [2026-02] [[Release Notes]](https://github.com/agentscope-ai/Trinity-RFT/releases/tag/v0.5.1) Trinity-RFT v0.5.1 released: Enhanced VLM support, logging improvements, bug fixes.
* [2026-02] [[Release Notes]](https://github.com/agentscope-ai/Trinity-RFT/releases/tag/v0.5.0) Trinity-RFT v0.5.0 released: colocate mode for single-GPU scenarios, trainer driven weight synchronization, automatic parallelism setting suggestion, and more.
* [2026-01] 🎉 Three papers accepted by ICLR 2026: [CHORD](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/mix_chord), [BOTS](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/bots), and [Group-relative REINFORCE variants](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/rec_gsm8k). Try out these new algorithms in Trinity-RFT!
@@ -47,7 +47,7 @@ Trinity-RFT provides functionalities for users with different backgrounds and ob
- [2025-11] Introducing [BOTS](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/bots): online RL task selection for efficient LLM fine-tuning ([paper](https://arxiv.org/pdf/2510.26374)).
- [2025-09] [Our paper](https://arxiv.org/pdf/2509.24203) reveals a novel off-policy interpretation for group-relative REINFORCE and its variants like GRPO and AsymRE ([implementation](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/rec_gsm8k)).
- - [2026-03] [2025-08] Introducing [CHORD](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/mix_chord): dynamic SFT + RL integration for advanced LLM fine-tuning ([paper](https://arxiv.org/pdf/2508.11408)).
+ - [2025-08] Introducing [CHORD](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/mix_chord): dynamic SFT + RL integration for advanced LLM fine-tuning ([paper](https://arxiv.org/pdf/2508.11408)).
- [2025-11] Trinity-RFT v0.3.3 released: bug fixes.
- [2025-11] Trinity-RFT v0.3.2 released: bug fixes and advanced task selection & scheduling.
- [2025-10] Trinity-RFT v0.3.1 released: multi-stage training support, improved agentic RL examples, LoRA support, debug mode and new RL algorithms.
diff --git a/README_zh.md b/README_zh.md
index 23e76fca8bf..e7cd96a334c 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -41,8 +41,8 @@ Trinity-RFT 面向不同背景和目标的用户提供相应功能:
## 🚀 新闻
+* [2026-04] [[发布说明]](https://github.com/agentscope-ai/Trinity-RFT/releases/tag/v0.5.2) Trinity-RFT v0.5.2 发布:支持 Qwen3.5 系列,修复 Bug 并进行多项优化。
* [2026-03] 🤖 Trinity-RFT 助力 CoPaw-Flash 训练,打造更懂本地化场景的智能体小模型。欢迎到 [CoPaw](https://github.com/agentscope-ai/CoPaw) 试用 CoPaw-Flash,模型下载请见 [ModelScope](https://www.modelscope.cn/organization/AgentScope) 和 [HuggingFace](https://huggingface.co/agentscope-ai)([新闻](https://mp.weixin.qq.com/s/-BXNU_PMi6QJuwSB5BqTbQ))。
-* [2026-03] Trinity-RFT 现在已经支持 Qwen3.5 系列模型。
* [2026-02] [[发布说明]](https://github.com/agentscope-ai/Trinity-RFT/releases/tag/v0.5.1) Trinity-RFT v0.5.1 发布:增强 VLM 支持,改进日志系统,修复若干 Bug。
* [2026-02] [[发布说明]](https://github.com/agentscope-ai/Trinity-RFT/releases/tag/v0.5.0) Trinity-RFT v0.5.0 发布:单 GPU 场景下的 colocate 模式,trainer 驱动的权重同步,自动并行设置建议等新功能。
* [2026-01] 🎉 三篇论文被 ICLR 2026 接收:[CHORD](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/mix_chord)、[BOTS](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/bots) 和 [Group-relative REINFORCE 系列变种](https://github.com/agentscope-ai/Trinity-RFT/tree/main/examples/rec_gsm8k)。在 Trinity-RFT 中尝试这些新算法吧!
diff --git a/pyproject.toml b/pyproject.toml
index 92dc9d89d4a..e140fd1d6c3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "trinity-rft"
-version = "0.5.1"
+dynamic = ["version"]
authors = [
{name="Trinity-RFT Team", email="trinity-rft@outlook.com"},
]
@@ -125,6 +125,9 @@ exclude = ["tests*", "docs*", "scripts*"]
[tool.setuptools.package-data]
my_package = ["*.md", "*.rst"]
+[tool.setuptools.dynamic]
+version = {attr = "trinity.__version__"}
+
[tool.black]
line-length = 100
target-version = ["py310", "py311", "py312"]
diff --git a/tests/common/vllm_test.py b/tests/common/vllm_test.py
index 81075cfb527..debfa56cd76 100644
--- a/tests/common/vllm_test.py
+++ b/tests/common/vllm_test.py
@@ -203,13 +203,24 @@ def _check_experience(exp):
self.assertEqual(len(encoded_prompt), exp.prompt_length)
self.assertLessEqual(exp.prompt_length, self.config.model.max_prompt_tokens)
# check response content and length
- encoded_response = self.tokenizer.encode(exp.response_text, add_special_tokens=False)
- self.assertEqual(len(encoded_response), len(exp.tokens) - exp.prompt_length)
- self.assertLessEqual(
- len(exp.tokens) - exp.prompt_length, self.config.model.max_response_tokens
- )
- # check full sequence
- self.assertLessEqual(len(exp.tokens), self.config.model.max_model_len)
+ if exp.truncate_status == "prompt_truncated":
+ self.assertEqual(
+ exp.response_text, "[This experience is masked out due to overlong prompt]"
+ )
+ self.assertEqual(exp.prompt_text, self.tokenizer.decode(exp.tokens[:-1]))
+ self.assertEqual(len(exp.tokens), self.config.model.max_prompt_tokens + 1)
+ self.assertEqual(exp.prompt_length, self.config.model.max_prompt_tokens)
+ self.assertTrue(torch.equal(exp.logprobs, torch.zeros(1, dtype=torch.float32)))
+ else:
+ encoded_response = self.tokenizer.encode(
+ exp.response_text, add_special_tokens=False
+ )
+ self.assertEqual(len(encoded_response), len(exp.tokens) - exp.prompt_length)
+ self.assertLessEqual(
+ len(exp.tokens) - exp.prompt_length, self.config.model.max_response_tokens
+ )
+ # check full sequence
+ self.assertLessEqual(len(exp.tokens), self.config.model.max_model_len)
# For vllm engine, max_prompt_tokens and max_response_tokens work
response = self.model_wrapper.chat(messages)
@@ -241,6 +252,30 @@ def _check_experience(exp):
self.config.model.max_response_tokens,
)
+ # test prompt truncation branch in generate
+ if self.max_prompt_tokens == 5:
+ await prepare_engines(self.engines, self.auxiliary_engines)
+ await self.model_wrapper.prepare()
+
+ prompt = "This is a deliberately long prompt for truncation coverage."
+ prompt_token_ids = self.tokenizer(prompt, truncation=False, return_tensors="pt")[
+ "input_ids"
+ ][0].tolist()
+ self.assertGreater(len(prompt_token_ids), self.config.model.max_prompt_tokens)
+
+ responses = self.model_wrapper.generate([prompt], n=2)
+ self.assertEqual(len(responses), 2)
+
+ for response in responses:
+ self.assertEqual(response.truncate_status, "prompt_truncated")
+ _check_experience(response)
+
+ exps = self.model_wrapper.extract_experience_from_history()
+ self.assertEqual(len(exps), 2)
+ for exp in exps:
+ self.assertEqual(exp.truncate_status, "prompt_truncated")
+ _check_experience(exp)
+
class TestModelLenWithoutPromptTruncation(RayUnittestBaseAsync):
def setUp(self):
diff --git a/trinity/__init__.py b/trinity/__init__.py
index 1ce298d2b5b..c573fe74ebb 100644
--- a/trinity/__init__.py
+++ b/trinity/__init__.py
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-
"""Trinity-RFT (Reinforcement Fine-Tuning)"""
-__version__ = "0.5.1"
+__version__ = "0.5.2"
diff --git a/trinity/common/models/model.py b/trinity/common/models/model.py
index 3fc9f5b4fdf..8150893921d 100644
--- a/trinity/common/models/model.py
+++ b/trinity/common/models/model.py
@@ -142,20 +142,14 @@ def _handle_prompt_truncation(self, prompt: str, **kwargs) -> Tuple[Sequence, bo
self.logger.warning(f"Prompt was truncated to {self.config.max_prompt_tokens} tokens")
dummy_response = "[This experience is masked out due to overlong prompt]"
- dummy_response_tokens = self.tokenizer( # type: ignore
- dummy_response, truncation=False, return_tensors="pt"
- )["input_ids"][0].tolist()
- dummy_response_tokens = dummy_response_tokens[
- : min(len(dummy_response_tokens), self.config.max_response_tokens) # type: ignore
- ]
-
- token_ids = prompt_token_ids[: self.config.max_prompt_tokens] + dummy_response_tokens
+
+ token_ids = prompt_token_ids[: self.config.max_prompt_tokens + 1]
return [
Experience(
tokens=token_ids,
- logprobs=torch.zeros(len(dummy_response_tokens), dtype=torch.float32),
- prompt_length=len(prompt_token_ids),
- prompt_text=self.tokenizer.decode(prompt_token_ids),
+ logprobs=torch.zeros(1, dtype=torch.float32),
+ prompt_length=self.config.max_prompt_tokens, # Use truncated length
+ prompt_text=self.tokenizer.decode(token_ids[:-1]),
response_text=dummy_response,
truncate_status="prompt_truncated",
reward=0.0,