Skip to content

Commit

Permalink
Merge branch 'saaketh/remove_deepspeed' of https://github.com/mosaicm…
Browse files Browse the repository at this point in the history
…l/composer into saaketh/remove_deepspeed

merge type beat
  • Loading branch information
snarayan21 committed Dec 3, 2024
2 parents 7478e46 + af223eb commit 8af7f07
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
4 changes: 2 additions & 2 deletions composer/callbacks/checkpoint_saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ def _save_checkpoint(self, state: State, logger: Logger):
logger=logger,
)
else:
remote_file_name = self.remote_file_name.format(state,).lstrip('/')
remote_file_name = self.remote_file_name.format(state).lstrip('/')

log.debug(f'Uploading checkpoint to {remote_file_name}')
try:
Expand All @@ -533,7 +533,7 @@ def _save_checkpoint(self, state: State, logger: Logger):

# symlinks stay the same with sharded checkpointing
if self.latest_remote_file_name is not None:
symlink_name = self.latest_remote_file_name.format(state,).lstrip('/') + '.symlink'
symlink_name = self.latest_remote_file_name.format(state).lstrip('/') + '.symlink'

# create and upload a symlink file
symlink_filename = os.path.join(
Expand Down
2 changes: 1 addition & 1 deletion composer/trainer/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1233,7 +1233,7 @@ def __init__(
**parallelism_config_args,
) if len(parallelism_config_args) > 0 else None
if parallelism_config is not None or dist.get_world_size() > 1:
# FSDP requries torch.distributed to be initialized, even if the world size is 1
# FSDP requires torch.distributed to be initialized, even if the world size is 1
# And torch.distributed is always required for multi-rank training
dist.initialize_dist(device, dist_timeout)
if parallelism_config is not None:
Expand Down

0 comments on commit 8af7f07

Please sign in to comment.