use log1p in orpo loss

huggingface/trl#1491
xin-li-67 · Mar 31, 2024 · 68aaa49 · 68aaa49
1 parent 099db6a
commit 68aaa49
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/src/llmtuner/train/orpo/trainer.py b/src/llmtuner/train/orpo/trainer.py
@@ -84,7 +84,7 @@ def odds_ratio_loss(
 
         # Derived from Eqs. (4) and (7) from https://arxiv.org/abs/2403.07691 by using log identities and exp(log(P(y|x)) = P(y|x)
         log_odds = (chosen_logps - rejected_logps) - (
-            torch.log(1 - torch.exp(chosen_logps)) - torch.log(1 - torch.exp(rejected_logps))
+            torch.log1p(-torch.exp(chosen_logps)) - torch.log1p(-torch.exp(rejected_logps))
         )
         ratio = F.logsigmoid(log_odds)
         losses = self.beta * ratio