We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6c1406c commit d3e7fd8Copy full SHA for d3e7fd8
machine/jobs/settings.yaml
@@ -10,14 +10,15 @@ default:
10
do_train: true
11
optim: adamw_torch
12
warmup_steps: 1000
13
- per_device_train_batch_size: 16
14
- gradient_accumulation_steps: 4
+ per_device_train_batch_size: 64
+ gradient_accumulation_steps: 1
15
label_smoothing_factor: 0.2
16
group_by_length: true
17
gradient_checkpointing: true
18
lr_scheduler_type: cosine
19
learning_rate: 0.0002
20
fp16: true
21
+ tf32: true
22
save_strategy: no
23
max_steps: 5000
24
generate_params:
0 commit comments