diff --git a/checkpoint/orbax/checkpoint/checkpoint_utils.py b/checkpoint/orbax/checkpoint/checkpoint_utils.py index 102a4ab7..69d437b7 100644 --- a/checkpoint/orbax/checkpoint/checkpoint_utils.py +++ b/checkpoint/orbax/checkpoint/checkpoint_utils.py @@ -76,7 +76,12 @@ def _unlock_checkpoint( """Removes a LOCKED directory to indicate unlocking.""" if multihost.process_index() == 0: logging.info('Unlocking existing step: %d.', step) - step_dir = step_name_format.find_step(checkpoint_dir, step).path + try: + step_dir = step_name_format.find_step(checkpoint_dir, step).path + except ValueError as e: + # Checkpoint no longer exists, so there is nothing to unlock. + logging.warning('Did not find checkpoint: %s', e) + return utils.lockdir(step_dir).unlink(missing_ok=True) diff --git a/checkpoint/orbax/checkpoint/checkpoint_utils_test.py b/checkpoint/orbax/checkpoint/checkpoint_utils_test.py index 6fbe3b58..63d01cd9 100644 --- a/checkpoint/orbax/checkpoint/checkpoint_utils_test.py +++ b/checkpoint/orbax/checkpoint/checkpoint_utils_test.py @@ -302,6 +302,12 @@ def test_unlock_existing(self): self.assertFalse(utils.is_locked(self.directory / str(0))) self.assertFalse(utils.is_locked(self.directory / str(1))) + def test_unlock_deleted(self): + # Checkpoint does not exist; `_unlock_checkpoint` returns without raising. + checkpoint_utils._unlock_checkpoint( + self.directory, step=0, step_name_format=step_lib.standard_name_format() + ) + @parameterized.parameters( (None, None), (None, 8),