Add trust_remote_code to GRPOConfig

muupan · muupan · commit 8376f615593c · 2025-10-01T16:39:47.000+09:00
diff --git a/trl/trainer/grpo_config.py b/trl/trainer/grpo_config.py
@@ -286,6 +286,10 @@ class GRPOConfig(TrainingArguments):
             "it prevents the model from generating different logprobs for the same input."
         },
     )
+    trust_remote_code: bool = field(
+        default=False,
+        metadata={"help": "Whether to trust remote code when loading custom models e.g. from the Hugging Face Hub."},
+    )
 
     # Parameters that control the data preprocessing
     # The default value remove_unused_columns is overwritten from the parent class, because in GRPO we usually rely on
diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
@@ -239,7 +239,7 @@ def __init__(
                     f"a `torch.dtype` (e.g., 'float32'), but got {dtype}."
                 )
             # Disable caching if gradient checkpointing is enabled (not supported)
-            config = AutoConfig.from_pretrained(model_id)
+            config = AutoConfig.from_pretrained(model_id, trust_remote_code=self.args.trust_remote_code)
             architecture = getattr(transformers, config.architectures[0])
             model = architecture.from_pretrained(model_id, **model_init_kwargs)
         else:
@@ -263,7 +263,9 @@ def __init__(
 
         # Processing class
         if processing_class is None:
-            processing_class = AutoProcessor.from_pretrained(model.config._name_or_path)
+            processing_class = AutoProcessor.from_pretrained(
+                model.config._name_or_path, trust_remote_code=self.args.trust_remote_code
+            )
 
         # Handle pad token for processors or tokenizers
         if isinstance(processing_class, ProcessorMixin):
@@ -427,7 +429,7 @@ def __init__(
             self.ref_model = None
         else:
             # For deepspeed, fsdp or non-distributed models, create a reference model from scratch
-            config = AutoConfig.from_pretrained(model_id)
+            config = AutoConfig.from_pretrained(model_id, trust_remote_code=self.args.trust_remote_code)
             architecture = getattr(transformers, config.architectures[0])
             self.ref_model = architecture.from_pretrained(model_id, **model_init_kwargs)
 
@@ -537,6 +539,7 @@ def __init__(
                     max_num_batched_tokens=4096,
                     model_impl=self.args.vllm_model_impl,
                     enable_sleep_mode=self.args.vllm_enable_sleep_mode,
+                    trust_remote_code=self.args.trust_remote_code,
                 )
                 if self.args.vllm_enable_sleep_mode:
                     self.llm.sleep(level=1)