From e6b48fa70f64215ff5486b80c3294cdb5e653625 Mon Sep 17 00:00:00 2001 From: Orbax Authors Date: Fri, 12 Jul 2024 16:22:43 -0700 Subject: [PATCH] When unlocking a checkpoint interpret 'missing' as 'already unlocked' PiperOrigin-RevId: 651915877 --- checkpoint/orbax/checkpoint/checkpoint_utils.py | 7 ++++++- checkpoint/orbax/checkpoint/checkpoint_utils_test.py | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/checkpoint/orbax/checkpoint/checkpoint_utils.py b/checkpoint/orbax/checkpoint/checkpoint_utils.py index 102a4ab75..69d437b71 100644 --- a/checkpoint/orbax/checkpoint/checkpoint_utils.py +++ b/checkpoint/orbax/checkpoint/checkpoint_utils.py @@ -76,7 +76,12 @@ def _unlock_checkpoint( """Removes a LOCKED directory to indicate unlocking.""" if multihost.process_index() == 0: logging.info('Unlocking existing step: %d.', step) - step_dir = step_name_format.find_step(checkpoint_dir, step).path + try: + step_dir = step_name_format.find_step(checkpoint_dir, step).path + except ValueError as e: + # Checkpoint no longer exists, so there is nothing to unlock. + logging.warning('Did not find checkpoint: %s', e) + return utils.lockdir(step_dir).unlink(missing_ok=True) diff --git a/checkpoint/orbax/checkpoint/checkpoint_utils_test.py b/checkpoint/orbax/checkpoint/checkpoint_utils_test.py index 6fbe3b58c..63d01cd96 100644 --- a/checkpoint/orbax/checkpoint/checkpoint_utils_test.py +++ b/checkpoint/orbax/checkpoint/checkpoint_utils_test.py @@ -302,6 +302,12 @@ def test_unlock_existing(self): self.assertFalse(utils.is_locked(self.directory / str(0))) self.assertFalse(utils.is_locked(self.directory / str(1))) + def test_unlock_deleted(self): + # Checkpoint does not exist; `_unlock_checkpoint` returns without raising. + checkpoint_utils._unlock_checkpoint( + self.directory, step=0, step_name_format=step_lib.standard_name_format() + ) + @parameterized.parameters( (None, None), (None, 8),