diff --git a/scripts/simulate_multi_node_diloco.sh b/scripts/simulate_multi_node_diloco.sh index 4a69912a..5d026d73 100755 --- a/scripts/simulate_multi_node_diloco.sh +++ b/scripts/simulate_multi_node_diloco.sh @@ -59,7 +59,7 @@ export BASE_PORT=${BASE_PORT:-10001} for i in $(seq 0 $(($N - 1 ))) do - > logs/log$i + > logs/log$i.log GLOBAL_UNIQUE_ID=$i GLOBAL_RANK=$i CUDA_VISIBLE_DEVICES=$(get_cuda_devices $NUM_GPU $i) uv run torchrun --nproc_per_node=$NUM_GPU --node-rank 0 --rdzv-endpoint localhost:$((BASE_PORT + $i)) --nnodes=1 $@ > logs/log$i.log 2>&1 & child_pids+=($!) done