Skip to content

Commit

Permalink
move ask for live reco
Browse files Browse the repository at this point in the history
  • Loading branch information
Jackmin801 authored and samsja committed Nov 13, 2024
1 parent f41230c commit 14e0501
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions src/zeroband/comms.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ def __init__(

# Logging
self._optimize_ring_ranks()

if self.live_recovery_rank_src is not None:
self.live_recovery.ask_for_live_ckpt(self.live_recovery_rank_src)
self.global_pg.barrier().wait()

self._logger.info(f"global_pg size : {self.global_pg.size()}, local_pg size: {self.local_pg.size()}")

def __del__(self):
Expand Down Expand Up @@ -263,9 +268,6 @@ def _init_global_pg(self) -> None:
self.global_status = "running"
self._last_resolved_time = self.global_store.get("resolved_time").decode("utf-8")

if self.live_recovery_rank_src is not None:
self.live_recovery.ask_for_live_ckpt(self.live_recovery_rank_src)

self._start_heartbeat()

self._logger.info(
Expand Down

0 comments on commit 14e0501

Please sign in to comment.