@@ -800,9 +800,9 @@ def _validate_remote_device(self, remote_device, ds_config):
800
800
f'"nvme_path" in DeepSpeed Config cannot be None if remote device is { OffloadDeviceEnum .nvme } '
801
801
802
802
def _post_init_method (self , module ):
803
- #see_memory_usage(f"Before converting parmas in {module.__class__.__name__}", force=False)
803
+ #see_memory_usage(f"Before converting params in {module.__class__.__name__}", force=False)
804
804
print_rank_0 (f'Converting Params in { module .__class__ .__name__ } ' , force = False )
805
- see_memory_usage (f"Before converting and partitioning parmas in { module .__class__ .__name__ } " , force = False )
805
+ see_memory_usage (f"Before converting and partitioning params in { module .__class__ .__name__ } " , force = False )
806
806
807
807
global param_count
808
808
for name , param in module .named_parameters (recurse = False ):
@@ -825,7 +825,7 @@ def _post_init_method(self, module):
825
825
826
826
param .partition ()
827
827
see_memory_usage (
828
- f"Param count { param_count } . After converting and partitioning parmas in { module .__class__ .__name__ } " ,
828
+ f"Param count { param_count } . After converting and partitioning params in { module .__class__ .__name__ } " ,
829
829
force = False )
830
830
831
831
def _convert_to_deepspeed_param (self , param ):
@@ -1404,7 +1404,7 @@ def _reduce_scatter_gradients(self, param_list):
1404
1404
partition_size = param .ds_tensor .ds_numel
1405
1405
start = self .get_partition_rank () * partition_size
1406
1406
end = start + partition_size
1407
- #print_rank_0("REduce scatter was executed for praam {param.ds_id}")
1407
+ #print_rank_0("REduce scatter was executed for param {param.ds_id}")
1408
1408
if start < param .ds_numel and end > param .ds_numel :
1409
1409
elements = param .ds_numel - start
1410
1410
param .grad .view (- 1 ).narrow (0 , start , elements ).copy_ (reduced_partition .narrow (0 , 0 , elements ))
0 commit comments