From 6deffa9d804f044945a84a0011c11727368c1a74 Mon Sep 17 00:00:00 2001 From: Shriya Palsamudram Date: Tue, 3 Dec 2024 11:34:46 -0800 Subject: [PATCH] Update crash step in ft launcher test Signed-off-by: Shriya Palsamudram --- .github/workflows/cicd-main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml index 1c1683cb3ce8..a8b0fa661676 100644 --- a/.github/workflows/cicd-main.yml +++ b/.github/workflows/cicd-main.yml @@ -3934,7 +3934,7 @@ jobs: export FAULT_TOL_FINISHED_FLAG_FILE="/tmp/llm_tests/llama_pretrain_results/sample_job_finished_flag"; \ python tests/collections/llm/test_fault_nvrx.py \ --devices=2 \ - --crash-step=4 \ + --crash-step=16 \ --experiment-dir=/tmp/llm_tests/llama_pretrain_results \ --data-path=/home/TestData/nlp/megatron_llama/data/rp2_sample_sentencepiece_preproc_text_document \ --tokenizer-path=/home/TestData/nlp/megatron_llama/tokenizer.model \