From f3e0fde6c06e904ddc330ee339d09f5d1e3a3475 Mon Sep 17 00:00:00 2001 From: Sami Jaghouar Date: Wed, 11 Dec 2024 22:47:26 +0000 Subject: [PATCH] use max autotune no cuda graphs in 10b use max autotune no cuda graphs in 10b --- configs/10B/H100.toml | 1 + configs/10B/H100_cooldown.toml | 1 + configs/10B/H100_simple.toml | 1 + configs/150M_short/H100.toml | 1 + 4 files changed, 4 insertions(+) diff --git a/configs/10B/H100.toml b/configs/10B/H100.toml index d58e609..a36a0ca 100644 --- a/configs/10B/H100.toml +++ b/configs/10B/H100.toml @@ -5,6 +5,7 @@ wandb_resume = false [train] micro_bs = 1 ac_ckpt = true +torch_compile_mode = "max-autotune-no-cudagraphs" [optim] sched_type = "wsd-sqrt" diff --git a/configs/10B/H100_cooldown.toml b/configs/10B/H100_cooldown.toml index 9132b1e..be35241 100644 --- a/configs/10B/H100_cooldown.toml +++ b/configs/10B/H100_cooldown.toml @@ -5,6 +5,7 @@ wandb_resume = false [train] micro_bs = 1 ac_ckpt = true +torch_compile_mode = "max-autotune-no-cudagraphs" [optim] sched_type = "wsd-sqrt" diff --git a/configs/10B/H100_simple.toml b/configs/10B/H100_simple.toml index dafa64b..2f8dfd5 100644 --- a/configs/10B/H100_simple.toml +++ b/configs/10B/H100_simple.toml @@ -4,6 +4,7 @@ project = "debug_10B_zero_band" [train] micro_bs = 1 ac_ckpt = true +torch_compile_mode = "max-autotune-no-cudagraphs" [optim] sched_type = "wsd-sqrt" diff --git a/configs/150M_short/H100.toml b/configs/150M_short/H100.toml index af7582e..9924389 100644 --- a/configs/150M_short/H100.toml +++ b/configs/150M_short/H100.toml @@ -5,6 +5,7 @@ type_model = "llama2" [train] micro_bs = 64 # change this base on the gpu reshard_after_forward = true +torch_compile_mode = "max-autotune-no-cudagraphs" [optim] batch_size = 512