agentscope-ai · DavdGao · Jan 14, 2026 · Dec 26, 2025 · Dec 26, 2025 · Dec 26, 2025
diff --git a/examples/training/react_agent/README.md b/examples/training/react_agent/README.md
diff --git a/examples/training/react_agent/config.yaml b/examples/training/react_agent/config.yaml
@@ -1,20 +1,20 @@
-project: AgentScope-ReAct
-name: GSM8K-Qwen3-8B
+# Please refer to https://modelscope.github.io/Trinity-RFT/en/main/tutorial/trinity_configs.html for detailed explanation of each field.
+project: AgentScope
+name: GSM8K-Qwen3-0.6B
 # directory to save checkpoints, default to ./checkpoints if TRINITY_CHECKPOINT_ROOT_DIR not set
 checkpoint_root_dir: ${oc.env:TRINITY_CHECKPOINT_ROOT_DIR,./checkpoints}
 algorithm:
   algorithm_type: multi_step_grpo  # a GRPO-based algorithm for multi-step reasoning
   repeat_times: 8  # repeat each training sample 8 times
 model:
-  # path to the pre-trained model, default to Qwen/Qwen3-8B if TRINITY_MODEL_PATH not set
-  # Note: The model should have ReAct capabilities, e.g., Qwen3 8B or above
-  # smaller models may not perform well on complex reasoning tasks
-  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen3-8B}
+  # path to the pre-trained model, default to Qwen/Qwen3-0.6B if TRINITY_MODEL_PATH not set
+  model_path: ${oc.env:TRINITY_MODEL_PATH,Qwen/Qwen3-0.6B}
   # maximum tokens generated in response
   max_response_tokens: 16384
   # maximum token length for both input and output
   # if you face OOM, try to reduce max_model_len and max_response_tokens
   max_model_len: 24576
+  temperature: 1.0
 cluster:
   node_num: 1  # cluster with 1 node
   gpu_per_node: 8  # each node has 8 GPUs
@@ -25,41 +25,29 @@ buffer:
   explorer_input:
     taskset:  # define the taskset for rollout
       name: gsm8k
-      storage_type: file
       path: 'openai/gsm8k'
       subset_name: 'main'
       split: 'train'
-      format:
-        prompt_key: 'question'
-        response_key: 'answer'
-      rollout_args:
-        temperature: 1.0
 explorer:
   runner_per_model: 16  # each model has 16 runners for parallel rollout
   max_timeout: 600  # max timeout for each rollout is 600 seconds
   rollout_model:
     engine_num: 4  # setup 4 vllm inference model instances
     tensor_parallel_size: 1  # each model instance uses tensor parallel size of 1
-    enable_prefix_caching: false
-    enforce_eager: true
     enable_openai_api: true  # some parameters to provide openai-style API, don't change them
     enable_history: true
     enable_auto_tool_choice: true
     # Qwen3 series tool_call_parser and reasoning_parser, if you use other models, please adjust accordingly
     tool_call_parser: hermes
     reasoning_parser: deepseek_r1
-    enable_thinking: true
-    dtype: bfloat16
-    seed: 42
 synchronizer:
   sync_style: dynamic_by_explorer
   sync_method: 'nccl'
-  sync_interval: 2
+  sync_interval: 1
   sync_timeout: 1800  # wait for 30 minutes
 trainer:
   save_interval: 100  # save checkpoint every 100 steps
   use_dynamic_bsz: true
-  max_token_len_per_gpu: 24576  # if you face OOM, try to reduce this value
-  ulysses_sequence_parallel_size: 2  # use sequence parallelism to reduce memory usage
+  ulysses_sequence_parallel_size: 1  # use sequence parallelism to reduce memory usage
 monitor:
-  monitor_type: tensorboard  # here we use tensorboard, you can also use wandb or mlflow
+  monitor_type: tensorboard  # here we use tensorboard, you can also use wandb, mlflow or swanlab
diff --git a/examples/training/react_agent/main.py b/examples/training/react_agent/main.py
@@ -1,74 +1,47 @@
 # -*- coding: utf-8 -*-
-"""Example of training a ReAct agent using RL with Trinity-RFT."""
-import os
+"""Example of training a ReAct agent on GSM8K with Trinity-RFT."""
 from typing import Dict
 
-
-from pydantic import BaseModel, Field
-from trinity.common.rewards import MathBoxedRewardFn
-
-from agentscope.tune import tune
-from agentscope.model import TrinityChatModel
+from agentscope.tuner import (
+    tune,
+    Dataset,
+    WorkflowOutput,
+    JudgeOutput,
+    TunerChatModel,
+    Algorithm,
+)
 from agentscope.agent import ReActAgent
 from agentscope.formatter import OpenAIChatFormatter
 from agentscope.message import Msg
 
 
-class GSM8KResponseStructure(BaseModel):
-    """Response structure for GSM8K tasks."""
-
-    result: str = Field(
-        description=(
-            "Your solution of the given math problem. "
-            "Put your final answer in boxed format, e.g., \\boxed{42}"
-        ),
-    )
-
-
-class GSM8KRewardFn(MathBoxedRewardFn):
-    """Reward function for GSM8K tasks."""
-
-    def __call__(
-        self,
-        response: Dict,
-        truth: str,
-        format_score_coef: float = 0.1,
-        **kwargs: Dict,
-    ) -> dict[str, float]:
-        """Calculate the reward based on the response and truth."""
-        # parse GSM8K truth
-        if isinstance(truth, str) and "####" in truth:
-            truth = truth.split("####")[1].strip()
-        else:
-            truth = str(truth)
-        return super().__call__(
-            response=response["result"],
-            truth=truth,
-            with_think=False,
-            format_score_coef=format_score_coef,
-            **kwargs,
-        )
-
-
-async def run_react_agent(task: Dict, model: TrinityChatModel) -> float:
+async def run_react_agent(
+    task: Dict,
+    model: TunerChatModel,
+    auxiliary_models: Dict[str, TunerChatModel],
+) -> WorkflowOutput:
     """A simple workflow function using the ReAct agent to solve tasks.
 
     Args:
         task (Dict): The task to be solved.
-        model (TrinityChatModel): The language model to use.
+        model (TunerChatModel): The language model to use.
+        auxiliary_models (Dict[str, TunerChatModel]):
+            A dictionary of additional chat models available for
+            LLM-as-a-Judge. Not used in this workflow.
 
     Returns:
         float: The reward obtained by solving the task.
     """
+    assert (
+        len(auxiliary_models) == 0
+    ), "No auxiliary models are used in this workflow."
+
     sys_prompt = (
         "You are an agent specialized in solving math problems with tools. "
         "Please solve the math problem given to you. You can write and "
         "execute Python code to perform calculation or verify your answer. "
         "You should return your final answer within \\boxed{{}}."
     )
-
-    response_structure = GSM8KResponseStructure
-    reward_fn = GSM8KRewardFn()
     agent = ReActAgent(
         name="react_agent",
         sys_prompt=sys_prompt,
@@ -78,21 +51,79 @@ async def run_react_agent(task: Dict, model: TrinityChatModel) -> float:
     )
     response = await agent.reply(
         msg=Msg("user", task["question"], role="user"),
-        structured_model=response_structure,
     )
-    reward = reward_fn(
-        response=response.metadata,
-        truth=task["answer"],
+    return WorkflowOutput(
+        response=response,
+    )
+
+
+async def gsm8k_judge(
+    task: Dict,
+    response: Msg,
+    auxiliary_models: Dict[str, TunerChatModel],
+) -> JudgeOutput:
+    """A simple judge function to calculate reward based on agent's response.
+
+    Args:
+        task (Dict): The task information for the corresponding workflow.
+        response (Msg): The response generated by the corresponding workflow.
+        auxiliary_models (Dict[str, TunerChatModel]):
+            A dictionary of additional chat models available for LLM-as-a-Judge
+            usage. The keys are model names, and the values are the
+            corresponding TunerChatModel instances.
+
+    Returns:
+        JudgeOutput: The reward value assigned by the judge function.
+    """
+    from trinity.common.rewards.math_reward import MathBoxedRewardFn
+
+    assert (
+        len(auxiliary_models) == 0
+    ), "No auxiliary models are used in this workflow."
+
+    reward_fn = MathBoxedRewardFn()
+    # parse truth from gsm8k raw text
+    truth = task["answer"]
+    if isinstance(truth, str) and "####" in truth:
+        truth = truth.split("####")[1].strip()
+    else:
+        truth = str(truth)
+    # parse answer from response message
+    result = response.get_text_content()
+    reward_dict = reward_fn(
+        response=result,
+        truth=truth,
+    )
+    return JudgeOutput(
+        reward=sum(reward_dict.values()),
+        metrics=reward_dict,
     )
-    return sum(reward.values())
 
 
 if __name__ == "__main__":
-    config_path = os.path.join(
-        os.path.dirname(__file__),
-        "config.yaml",
+    dataset = Dataset(
+        path="openai/gsm8k",
+        name="main",
+        split="train",
+    )
+    tuner_model = TunerChatModel(
+        model_path="Qwen/Qwen3-0.6B",
+        max_model_len=24576,
+        max_tokens=16384,
+        temperature=1.0,
+        inference_engine_num=4,
+        tensor_parallel_size=1,
+    )
+    algorithm = Algorithm(
+        algorithm_type="multi_step_grpo",
+        group_size=8,
+        learning_rate=1e-6,
+        batch_size=32,
     )
     tune(
         workflow_func=run_react_agent,
-        config_path=config_path,
+        judge_func=gsm8k_judge,
+        train_dataset=dataset,
+        model=tuner_model,
+        algorithm=algorithm,
     )
diff --git a/src/agentscope/model/_trinity_model.py b/src/agentscope/model/_trinity_model.py
@@ -4,6 +4,7 @@
     Optional,
     TYPE_CHECKING,
 )
+from typing_extensions import deprecated
 from ._openai_model import OpenAIChatModel
 from ..types import JSONSerializableObject
 
@@ -14,6 +15,10 @@
     AsyncOpenAI = "openai.AsyncOpenAI"
 
 
+@deprecated(
+    "TrinityChatModel is deprecated, please use "
+    "`agentscope.tuner.TunerChatModel` instead.",
+)
 class TrinityChatModel(OpenAIChatModel):
     """A model class for RL Training with Trinity-RFT."""
 

diff --git a/src/agentscope/tune/__init__.py b/src/agentscope/tune/__init__.py
@@ -1,10 +1,7 @@
 # -*- coding: utf-8 -*-
-"""The learning module of AgentScope, including RL and SFT."""
+"""This module has been deprecated and renamed to 'agentscope.tuner'."""
 
-from ._tune import tune
-from ._workflow import WorkflowType
-
-__all__ = [
-    "tune",
-    "WorkflowType",
-]
+raise ImportError(
+    "The 'agentscope.tune' module has been renamed to 'agentscope.tuner'. "
+    "Please update your imports: 'from agentscope.tuner import ...'",
+)
diff --git a/src/agentscope/tune/_tune.py b/src/agentscope/tune/_tune.py