Skip to content

Commit

Permalink
add short 150m exp
Browse files Browse the repository at this point in the history
  • Loading branch information
samsja committed Sep 27, 2024
1 parent d9059e1 commit 8394e6e
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 0 deletions.
12 changes: 12 additions & 0 deletions configs/150M_short/3090.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name_model = "150M"
project = "debug_150m_zero_band"

[train]
micro_bs = 16 # change this base on the gpu
sharding_strategy = "SHARD_GRAD_OP"

[optim]
batch_size = 512
warmup_steps = 1000
total_steps = 15_000
lr = 4e-4
12 changes: 12 additions & 0 deletions configs/150M_short/A40.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name_model = "150M"
project = "debug_150m_zero_band"

[train]
micro_bs = 32 # change this base on the gpu
sharding_strategy = "SHARD_GRAD_OP"

[optim]
batch_size = 512
warmup_steps = 1000
total_steps = 15_000
lr = 4e-4
12 changes: 12 additions & 0 deletions configs/150M_short/H100.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
name_model = "150M"
project = "debug_150m_zero_band"

[train]
micro_bs = 64 # change this base on the gpu
sharding_strategy = "SHARD_GRAD_OP"

[optim]
batch_size = 512
warmup_steps = 1000
total_steps = 15_000
lr = 4e-4

0 comments on commit 8394e6e

Please sign in to comment.