MegatronLM Client: Truncate to max_length and not max_length+1 in _lo…

…glikelihood_tokens
OpenGPTX · Nov 4, 2023 · 6f5c19f · 6f5c19f
1 parent c3cef3e
commit 6f5c19f
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/lm_eval/models/megatronlm.py b/lm_eval/models/megatronlm.py
@@ -177,7 +177,7 @@ def _collate(x):
             ctxlens = []
             for cache_key, context_enc, continuation_enc in chunk:
                 # max_length+1 because the API takes up to 2049 tokens, including the first context token
-                inp = (context_enc + continuation_enc)[-(self.max_length + 1) :]
+                inp = (context_enc + continuation_enc)[-self.max_length:]
                 # TODO: the logic is much simpler if we just look at the length of continuation tokens
                 ctxlen = len(context_enc) - max(
                     0, len(context_enc) + len(continuation_enc) - (self.max_length + 1)