From 5b2783e2527447f609ee1f474ddf7824ea07d283 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Mon, 30 Sep 2024 11:40:28 +0800 Subject: [PATCH] fix: fail when diloco is used with global pg size of 1 --- src/zeroband/diloco.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/zeroband/diloco.py b/src/zeroband/diloco.py index 46a61cbb..ab59fc80 100644 --- a/src/zeroband/diloco.py +++ b/src/zeroband/diloco.py @@ -58,6 +58,8 @@ def __init__( self._init_offloaded_optimizer(model=model) def _init_offloaded_optimizer(self, model): + if self.global_pg.size() < 2: + raise ValueError(f"Diloco requires at least two nodes to work. Got a world info of {self.world_info}") self.param_list_cpu = self.get_offloaded_param(model) self.outer_optimizer = torch.optim.SGD( self.param_list_cpu, lr=self.config.outer_lr, momentum=0.9, nesterov=True