Skip to content

Commit

Permalink
fix: fail when diloco is used with global pg size of 1
Browse files Browse the repository at this point in the history
  • Loading branch information
Jackmin801 committed Sep 30, 2024
1 parent cdb5703 commit 5b2783e
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions src/zeroband/diloco.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ def __init__(
self._init_offloaded_optimizer(model=model)

def _init_offloaded_optimizer(self, model):
if self.global_pg.size() < 2:
raise ValueError(f"Diloco requires at least two nodes to work. Got a world info of {self.world_info}")
self.param_list_cpu = self.get_offloaded_param(model)
self.outer_optimizer = torch.optim.SGD(
self.param_list_cpu, lr=self.config.outer_lr, momentum=0.9, nesterov=True
Expand Down

0 comments on commit 5b2783e

Please sign in to comment.