-
Notifications
You must be signed in to change notification settings - Fork 1
/
g_stack.sh
51 lines (46 loc) · 1.23 KB
/
g_stack.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/bin/bash
# SLURM SUBMIT SCRIPT
#SBATCH --job prts
#SBATCH --partition=Your partition
#SBATCH --nodes=4
#SBATCH --ntasks-per-node=8
#SBATCH --gres=gpu:8
#SBATCH --cpus-per-task=28
#SBATCH --time=7-00:00:00
#SBATCH --output=prts.out
#SBATCH --error=prts.err
#SBATCH --exclusive
cd /path/to/your/prts
TIME=$(date +%Y-%m-%d-%H-%M-%S)
SCRIPT_DIR=/aifs4su/data/tongxuluo/PRTS
MODEL_NAME=24L2048H
METHOD=stacking
CONFIG="./prts_configs/stacking_6L_24L.json"
export WANDB_API_KEY=Your WANDB_API_KEY
source /home/tongxuluo/env/anaconda3/bin/activate
conda activate tinyllama
srun python pretrain/run_pretrain.py \
--num_nodes=4 \
--model_name=${MODEL_NAME} \
--name=${MODEL_NAME} \
--method=${METHOD} \
--config_path=${CONFIG} \
--out_dir=${SCRIPT_DIR}/${METHOD}/${TIME} \
--train_data_dir=/path/to/your/slimpajama \
--devices=8 \
--global_batch_size=1024 \
--learning_rate=3e-4 \
--min_lr=3e-5 \
--micro_batch_size=8 \
--max_step=300000 \
--warmup_steps=3000 \
--log_step_interval=1 \
--eval_iters=10000 \
--save_step_interval=5000 \
--eval_step_interval=5000 \
--weight_decay=1e-1 \
--beta1=0.9 \
--beta2=0.95 \
--grad_clip=1.0 \
--decay_lr=True \
--resume_id=5000