Skip to content

Commit

Permalink
update short configs
Browse files Browse the repository at this point in the history
  • Loading branch information
samsja committed Dec 6, 2024
1 parent 30a8a90 commit 36d1d35
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 13 deletions.
8 changes: 1 addition & 7 deletions configs/150M/3090.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,4 @@ reshard_after_forward = true
batch_size = 512
warmup_steps = 1000
total_steps = 88_000
lr = 4e-4


[data]
dataset_name_or_paths = "/data/datasets/fineweb-edu,/data/datasets/StackV1-popular"
dataset_ratio = "80:20"
num_workers = 2
lr = 4e-4
4 changes: 2 additions & 2 deletions configs/150M_short/3090.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ reshard_after_forward = true

[optim]
batch_size = 512
warmup_steps = 1000
total_steps = 15_000
warmup_steps = 500
total_steps = 8192
lr = 4e-4
5 changes: 3 additions & 2 deletions configs/150M_short/A40.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ type_model = "llama2"
[train]
micro_bs = 32 # change this base on the gpu
reshard_after_forward = true

[optim]
batch_size = 512
warmup_steps = 1000
total_steps = 15_000
warmup_steps = 500
total_steps = 8192
lr = 4e-4
5 changes: 3 additions & 2 deletions configs/150M_short/H100.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ type_model = "llama2"

[train]
micro_bs = 64 # change this base on the gpu
reshard_after_forward = true

[optim]
batch_size = 512
warmup_steps = 1000
total_steps = 15_000
warmup_steps = 500
total_steps = 8192
lr = 4e-4

0 comments on commit 36d1d35

Please sign in to comment.