diff --git a/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh b/tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh.disabled similarity index 100% rename from tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh rename to tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh.disabled diff --git a/tests/test_suites/nightly.txt b/tests/test_suites/nightly.txt index ea353d1131..63588bc036 100644 --- a/tests/test_suites/nightly.txt +++ b/tests/test_suites/nightly.txt @@ -65,7 +65,8 @@ tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp2.sh tests/test_suites/llm/sft-llama3.1-8b-1n8g-fsdp2tp1-dynamicbatch.sh # Functional 32b test -tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh +# TODO(ahmadki): sequence parallel + tp_size >1 is currently broken in torch==2.8.0, disabled until https://github.com/NVIDIA-NeMo/Automodel/issues/652 is fixed +# tests/test_suites/llm/sft-qwen2.5-32b-4n8g-fsdp2tp8sp-actckpt.v3.sh # Megatron tests/test_suites/llm/sft-llama3.1-8b-1n8g-megatron.sh