Skip to content

Commit

Permalink
When unlocking a checkpoint interpret 'missing' as 'already unlocked'
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 651915877
  • Loading branch information
Orbax Authors committed Jul 13, 2024
1 parent a704408 commit 253583e
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 1 deletion.
7 changes: 6 additions & 1 deletion checkpoint/orbax/checkpoint/checkpoint_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,12 @@ def _unlock_checkpoint(
"""Removes a LOCKED directory to indicate unlocking."""
if multihost.process_index() == 0:
logging.info('Unlocking existing step: %d.', step)
step_dir = step_name_format.find_step(checkpoint_dir, step).path
try:
step_dir = step_name_format.find_step(checkpoint_dir, step).path
except ValueError as e:
# Checkpoint no longer exists, so there is nothing to unlock.
logging.warning('Did not find checkpoint: %s', e)
return
utils.lockdir(step_dir).unlink(missing_ok=True)


Expand Down
6 changes: 6 additions & 0 deletions checkpoint/orbax/checkpoint/checkpoint_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,12 @@ def test_unlock_existing(self):
self.assertFalse(utils.is_locked(self.directory / str(0)))
self.assertFalse(utils.is_locked(self.directory / str(1)))

def test_unlock_deleted(self):
# Checkpoint does not exist; `_unlock_checkpoint` returns without raising.
checkpoint_utils._unlock_checkpoint(
self.directory, step=0, step_name_format=step_lib.standard_name_format()
)

@parameterized.parameters(
(None, None),
(None, 8),
Expand Down

0 comments on commit 253583e

Please sign in to comment.