use max autotune no cuda graphs in 10b

PrimeIntellect-ai · Dec 11, 2024 · f3e0fde · f3e0fde
1 parent d814fff
commit f3e0fde
Show file tree

Hide file tree

Showing 4 changed files with 4 additions and 0 deletions.
diff --git a/configs/10B/H100.toml b/configs/10B/H100.toml
@@ -5,6 +5,7 @@ wandb_resume = false
 [train]
 micro_bs = 1
 ac_ckpt = true
+torch_compile_mode = "max-autotune-no-cudagraphs"
 
 [optim]
 sched_type = "wsd-sqrt"

diff --git a/configs/10B/H100_cooldown.toml b/configs/10B/H100_cooldown.toml
@@ -5,6 +5,7 @@ wandb_resume = false
 [train]
 micro_bs = 1
 ac_ckpt = true
+torch_compile_mode = "max-autotune-no-cudagraphs"
 
 [optim]
 sched_type = "wsd-sqrt"

diff --git a/configs/10B/H100_simple.toml b/configs/10B/H100_simple.toml
@@ -4,6 +4,7 @@ project = "debug_10B_zero_band"
 [train]
 micro_bs = 1
 ac_ckpt = true
+torch_compile_mode = "max-autotune-no-cudagraphs"
 
 [optim]
 sched_type = "wsd-sqrt"

diff --git a/configs/150M_short/H100.toml b/configs/150M_short/H100.toml
@@ -5,6 +5,7 @@ type_model = "llama2"
 [train]
 micro_bs = 64 # change this base on the gpu
 reshard_after_forward = true
+torch_compile_mode = "max-autotune-no-cudagraphs"
 
 [optim]
 batch_size = 512