From 27f4bea6a3c6a2eb214bff55f4da76251223c966 Mon Sep 17 00:00:00 2001 From: NoelBIrd Date: Thu, 4 Apr 2024 00:32:52 +0900 Subject: [PATCH 1/3] Remove duplicated code --- megatron/core/transformer/transformer_config.py | 1 - 1 file changed, 1 deletion(-) diff --git a/megatron/core/transformer/transformer_config.py b/megatron/core/transformer/transformer_config.py index 34b08910d9..a6d42c9418 100644 --- a/megatron/core/transformer/transformer_config.py +++ b/megatron/core/transformer/transformer_config.py @@ -150,7 +150,6 @@ class TransformerConfig(ModelParallelConfig): # activation recomputation #################### recompute_granularity: str = None - recompute_granularity: str = None """Determines which type of activation recompute to use. Megatron-core supports 'selective' activation checkpointing where only the memory intensive part of attention is checkpointed. These memory intensive activations are also less compute intensive which makes activation From c528dbf7fcd4cd2e872d5f38ca11123ea8e95f09 Mon Sep 17 00:00:00 2001 From: NoelBird Date: Thu, 4 Apr 2024 01:23:06 +0900 Subject: [PATCH 2/3] Fix typo --- megatron/training/training.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/megatron/training/training.py b/megatron/training/training.py index eaaf9bde24..181ac46e5b 100644 --- a/megatron/training/training.py +++ b/megatron/training/training.py @@ -161,7 +161,7 @@ def pretrain(train_valid_test_dataset_provider, 1) initialize Megatron. 2) setup model, optimizer and lr schedule using the model_provider. 3) call train_val_test_data_provider to get train/val/test datasets. - 4) train the modle using the forward_step_func. + 4) train the model using the forward_step_func. Args: train_valid_test_dataset_provider: a function that takes the size of From 152c4c1ba1f606333d057ba0c2f9e6676146a897 Mon Sep 17 00:00:00 2001 From: NoelBird Date: Thu, 4 Apr 2024 01:33:31 +0900 Subject: [PATCH 3/3] Remove Duplicated line --- megatron/training/training.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/megatron/training/training.py b/megatron/training/training.py index 181ac46e5b..1c1a214c97 100644 --- a/megatron/training/training.py +++ b/megatron/training/training.py @@ -211,9 +211,6 @@ def pretrain(train_valid_test_dataset_provider, time.time() - _TRAIN_START_TIME)) print_datetime('after megatron is initialized') - args = get_args() - timers = get_timers() - one_logger = get_one_logger() if one_logger: one_logger.log_metrics({