Skip to content

Commit

Permalink
fix ckpt
Browse files Browse the repository at this point in the history
  • Loading branch information
samsja committed Sep 29, 2024
1 parent 87be491 commit e239c27
Showing 1 changed file with 1 addition and 4 deletions.
5 changes: 1 addition & 4 deletions src/zeroband/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ def train(config: Config):
perf_counter = PerfCounter(window_size=10)

logger.info("starting training")
d = 0
while True:
if num_inner_steps > 1:
# if we don't use diloco we don't print the outer step logs
Expand Down Expand Up @@ -291,9 +290,7 @@ def train(config: Config):
and training_progress.step % config.ckpt.interval == 0
):
# we only allow to checkpoint after a outer step. For non diloco training outer step = 1 anyway
if d == 0:
ckpt_manager.save(config.ckpt.path, config.ckpt.remote_path)
d += 1
ckpt_manager.save(config.ckpt.path, config.ckpt.remote_path)

if training_progress.step >= config.optim.total_steps:
# we only allow to break outisde of the inner loop.
Expand Down

0 comments on commit e239c27

Please sign in to comment.