align model seq len with data seq len (#26)

PrimeIntellect-ai · Oct 1, 2024 · 80d050a · 80d050a
1 parent ac78db1
commit 80d050a
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 1 deletion.
diff --git a/src/zeroband/models/llama/__init__.py b/src/zeroband/models/llama/__init__.py
@@ -61,7 +61,7 @@
 }
 
 
-def get_model(name_model: str, type_model: str, vocab_size: int) -> tuple[Transformer, ModelArgs]:
+def get_model(name_model: str, type_model: str, vocab_size: int, seq_length: int) -> tuple[Transformer, ModelArgs]:
     """get the transformer model"""
 
     if type_model == "llama2":
@@ -72,4 +72,5 @@ def get_model(name_model: str, type_model: str, vocab_size: int) -> tuple[Transf
         raise ValueError(f"Model type {type_model} not supported")
 
     config.vocab_size = vocab_size
+    config.max_seq_len = seq_length
     return Transformer(config), config
diff --git a/src/zeroband/train.py b/src/zeroband/train.py
@@ -116,6 +116,7 @@ def train(config: Config):
         vocab_size=tokenizer.vocab_size
         if config.name_model != "debugmodel" or not config.data.fake
         else TEST_VOCAB_SIZE,
+        seq_length=config.data.seq_length,
     )
 
     if config.train.log_model_hash: