From 0b22218889c1867d81179f32780374de625e0fe3 Mon Sep 17 00:00:00 2001
From: Adam Louly <adamlouly3@gmail.com>
Date: Tue, 24 Oct 2023 01:27:15 -0700
Subject: [PATCH] fix max_pos_embeddings error (#1478)

* fix max_pos_embeddings error

* fix lint

---------

Co-authored-by: Adam Louly <adamlouly@microsoft.com@orttrainingdev9.d32nl1ml4oruzj4qz3bqlggovf.px.internal.cloudapp.net>
---
 .../training/language-modeling/run_clm.py            | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/examples/onnxruntime/training/language-modeling/run_clm.py b/examples/onnxruntime/training/language-modeling/run_clm.py
index bd9694ae41..cfe72186bc 100644
--- a/examples/onnxruntime/training/language-modeling/run_clm.py
+++ b/examples/onnxruntime/training/language-modeling/run_clm.py
@@ -493,14 +493,20 @@ def tokenize_function(examples):
                 remove_columns=column_names,
             )
 
+    if hasattr(config, "max_position_embeddings"):
+        max_pos_embeddings = config.max_position_embeddings
+    else:
+        # Define a default value if the attribute is missing in the config.
+        max_pos_embeddings = 1024
+
     if data_args.block_size is None:
         block_size = tokenizer.model_max_length
-        if block_size > config.max_position_embeddings:
+        if block_size > max_pos_embeddings:
             logger.warning(
                 f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
-                f"Using block_size={min(1024, config.max_position_embeddings)} instead. You can change that default value by passing --block_size xxx."
+                f"Using block_size={min(1024, max_pos_embeddings)} instead. You can change that default value by passing --block_size xxx."
             )
-            block_size = min(1024, config.max_position_embeddings)
+            block_size = min(1024, max_pos_embeddings)
     else:
         if data_args.block_size > tokenizer.model_max_length:
             logger.warning(