From 82f376c90d079775739b07b1ba5d88185231ee51 Mon Sep 17 00:00:00 2001 From: Chen Cui Date: Thu, 16 Jan 2025 09:53:49 -0800 Subject: [PATCH 1/4] fix nemo 1 packed sequence TE version error Signed-off-by: Chen Cui --- .../nlp/models/language_modeling/megatron_gpt_model.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index caa909dc7ead..19933720dc83 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -102,6 +102,7 @@ drain_embedding_wgrad_compute, get_model_config, init_method_normal, + is_te_min_version, scaled_init_method_normal, ) @@ -1366,7 +1367,9 @@ def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_ 'attention_mask': None if self.get_attention_mask_from_fusion else batch['attention_mask'], 'labels': batch['labels'] if 'labels' in batch else None, } - + if not is_te_min_version("1.13", check_equality=False): + # cu_seqlens_unpadded != cu_seqlens is not supported in 1.13 or earlier + cu_seqlens_unpadded = cu_seqlens forward_args['packed_seq_params'] = PackedSeqParams( cu_seqlens_q=cu_seqlens_unpadded, cu_seqlens_kv=cu_seqlens_unpadded, From 2178ea16b9f73c65083e97315c456fa052e871e1 Mon Sep 17 00:00:00 2001 From: Chen Cui Date: Fri, 17 Jan 2025 09:36:04 -0800 Subject: [PATCH 2/4] limit condition to only CP=1 case Signed-off-by: Chen Cui --- .../nlp/models/language_modeling/megatron_gpt_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 19933720dc83..aeab84c43b7c 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -1367,8 +1367,8 @@ def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_ 'attention_mask': None if self.get_attention_mask_from_fusion else batch['attention_mask'], 'labels': batch['labels'] if 'labels' in batch else None, } - if not is_te_min_version("1.13", check_equality=False): - # cu_seqlens_unpadded != cu_seqlens is not supported in 1.13 or earlier + elif not is_te_min_version("1.13", check_equality=False): + # cu_seqlens_unpadded != cu_seqlens is not supported in 1.13 or earlier when CP=1 cu_seqlens_unpadded = cu_seqlens forward_args['packed_seq_params'] = PackedSeqParams( cu_seqlens_q=cu_seqlens_unpadded, From 6e41169bb74b7c2578934823558c5d69a9d4d8ab Mon Sep 17 00:00:00 2001 From: Chen Cui Date: Fri, 17 Jan 2025 14:38:24 -0800 Subject: [PATCH 3/4] add cudnn version guard Signed-off-by: Chen Cui --- .../models/language_modeling/megatron_gpt_model.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index aeab84c43b7c..630da0467ff3 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -1367,9 +1367,16 @@ def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_ 'attention_mask': None if self.get_attention_mask_from_fusion else batch['attention_mask'], 'labels': batch['labels'] if 'labels' in batch else None, } - elif not is_te_min_version("1.13", check_equality=False): - # cu_seqlens_unpadded != cu_seqlens is not supported in 1.13 or earlier when CP=1 - cu_seqlens_unpadded = cu_seqlens + else: + from packaging.version import Version as PkgVersion + if ( + self.transformer_config.num_query_groups != self.transformer_config.num_attention_heads and + not is_te_min_version("1.13", check_equality=False) and + PkgVersion(os.getenv("CUDNN_VERSION", "9.5")) < PkgVersion("9.6") + ): + # cu_seqlens_unpadded != cu_seqlens when CP=1 is not supported in TE 1.13 or earlier + # and im CUDNN 9.5 or earlier when using GQA. + cu_seqlens_unpadded = cu_seqlens forward_args['packed_seq_params'] = PackedSeqParams( cu_seqlens_q=cu_seqlens_unpadded, cu_seqlens_kv=cu_seqlens_unpadded, From 1bd68adf97e0f4b6feca8ac85c490db4689a9059 Mon Sep 17 00:00:00 2001 From: cuichenx Date: Fri, 17 Jan 2025 22:40:57 +0000 Subject: [PATCH 4/4] Apply isort and black reformatting Signed-off-by: cuichenx --- .../nlp/models/language_modeling/megatron_gpt_model.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 630da0467ff3..411d9c6ee39d 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -1369,10 +1369,11 @@ def fwd_output_and_loss_func(dataloader_iter, model, checkpoint_activations_all_ } else: from packaging.version import Version as PkgVersion + if ( - self.transformer_config.num_query_groups != self.transformer_config.num_attention_heads and - not is_te_min_version("1.13", check_equality=False) and - PkgVersion(os.getenv("CUDNN_VERSION", "9.5")) < PkgVersion("9.6") + self.transformer_config.num_query_groups != self.transformer_config.num_attention_heads + and not is_te_min_version("1.13", check_equality=False) + and PkgVersion(os.getenv("CUDNN_VERSION", "9.5")) < PkgVersion("9.6") ): # cu_seqlens_unpadded != cu_seqlens when CP=1 is not supported in TE 1.13 or earlier # and im CUDNN 9.5 or earlier when using GQA.