huggingface · muupan · Nov 17, 2025 · qgallouedec · Oct 1, 2025 · muupan
diff --git a/trl/trainer/grpo_config.py b/trl/trainer/grpo_config.py
@@ -328,6 +328,10 @@ class GRPOConfig(TrainingArguments):
             "it prevents the model from generating different logprobs for the same input."
         },
     )
+    trust_remote_code: bool = field(
+        default=False,
+        metadata={"help": "Whether to trust remote code when loading custom models e.g. from the Hugging Face Hub."},
-        metadata={"help": "Whether to trust remote code when loading custom models e.g. from the Hugging Face Hub."},
+        metadata={"help": "Whether to trust remote code when loading custom models from the Hugging Face Hub."},
-        metadata={"help": "Whether to trust remote code when loading custom models e.g. from the Hugging Face Hub."},
+        metadata={"help": "Whether to trust remote code when loading custom models from the Hugging Face Hub."},
+    )
     cast_lm_head_to_fp32: bool = field(
         default=False,
         metadata={

diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
@@ -37,6 +37,7 @@
 from torch.utils.data import DataLoader, Sampler
 from transformers import (
     AutoConfig,
+    AutoModelForCausalLM,
     AutoModelForSequenceClassification,
     AutoProcessor,
     AutoTokenizer,
@@ -271,9 +272,13 @@ def __init__(
                     f"a `torch.dtype` (e.g., 'float32'), but got {dtype}."
                 )
             model_init_kwargs["device_map"] = model_init_kwargs.get("device_map", "auto")
-            config = AutoConfig.from_pretrained(model_id)
-            architecture = getattr(transformers, config.architectures[0])
-            model = architecture.from_pretrained(model_id, **model_init_kwargs)
+            config = AutoConfig.from_pretrained(model_id, trust_remote_code=args.trust_remote_code)
+            if architecture := getattr(transformers, config.architectures[0], None):
+                model = architecture.from_pretrained(model_id, **model_init_kwargs)
+            else:
+                model = AutoModelForCausalLM.from_pretrained(
+                    model_id, trust_remote_code=args.trust_remote_code, **model_init_kwargs
+                )
         else:
             model_id = get_config_model_id(model.config)
             if args.model_init_kwargs is not None:
@@ -295,7 +300,9 @@ def __init__(
 
         # Processing class
         if processing_class is None:
-            processing_class = AutoProcessor.from_pretrained(get_config_model_id(model.config), truncation_side="left")
+            processing_class = AutoProcessor.from_pretrained(
+                get_config_model_id(model.config), trust_remote_code=args.trust_remote_code, truncation_side="left"
+            )
 
         # Handle pad token for processors or tokenizers
         if isinstance(processing_class, ProcessorMixin):
@@ -469,9 +476,13 @@ def __init__(
             self.ref_model = None
         else:
             # For deepspeed, fsdp or non-distributed models, create a reference model from scratch
-            config = AutoConfig.from_pretrained(model_id)
-            architecture = getattr(transformers, config.architectures[0])
-            self.ref_model = architecture.from_pretrained(model_id, **model_init_kwargs)
+            config = AutoConfig.from_pretrained(model_id, trust_remote_code=args.trust_remote_code)
+            if architecture := getattr(transformers, config.architectures[0], None):
+                self.ref_model = architecture.from_pretrained(model_id, **model_init_kwargs)
+            else:
+                self.ref_model = AutoModelForCausalLM.from_pretrained(
+                    model_id, trust_remote_code=args.trust_remote_code, **model_init_kwargs
+                )
 
         # Disable dropout in the models
         if args.disable_dropout:
@@ -617,6 +628,7 @@ def cast_outputs_to_original_dtype(module, args, output):
                     max_num_batched_tokens=4096,
                     model_impl=self.args.vllm_model_impl,
                     enable_sleep_mode=self.args.vllm_enable_sleep_mode,
+                    trust_remote_code=self.args.trust_remote_code,
                     # Important so temperature scaling/logit tweaking affects the TIS log probs
                     logprobs_mode="processed_logprobs",
                     quantization=vllm_quantization,