From 884d25a43599b08d9672f88c6befce3bae80e87f Mon Sep 17 00:00:00 2001 From: Orbax Authors Date: Fri, 12 Jul 2024 16:22:43 -0700 Subject: [PATCH] When unlocking a checkpoint interpret 'missing' as 'already unlocked' PiperOrigin-RevId: 651915877 --- checkpoint/orbax/checkpoint/checkpoint_utils.py | 7 ++++++- checkpoint/orbax/checkpoint/checkpoint_utils_test.py | 6 ++++++ checkpoint/orbax/checkpoint/path/step.py | 2 +- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/checkpoint/orbax/checkpoint/checkpoint_utils.py b/checkpoint/orbax/checkpoint/checkpoint_utils.py index 102a4ab75..a5eaff840 100644 --- a/checkpoint/orbax/checkpoint/checkpoint_utils.py +++ b/checkpoint/orbax/checkpoint/checkpoint_utils.py @@ -76,7 +76,12 @@ def _unlock_checkpoint( """Removes a LOCKED directory to indicate unlocking.""" if multihost.process_index() == 0: logging.info('Unlocking existing step: %d.', step) - step_dir = step_name_format.find_step(checkpoint_dir, step).path + try: + step_dir = step_name_format.find_step(checkpoint_dir, step).path + except FileNotFoundError as e: + # Checkpoint no longer exists, so there is nothing to unlock. + logging.warning('Did not find checkpoint: %s', e) + return utils.lockdir(step_dir).unlink(missing_ok=True) diff --git a/checkpoint/orbax/checkpoint/checkpoint_utils_test.py b/checkpoint/orbax/checkpoint/checkpoint_utils_test.py index 6fbe3b58c..63d01cd96 100644 --- a/checkpoint/orbax/checkpoint/checkpoint_utils_test.py +++ b/checkpoint/orbax/checkpoint/checkpoint_utils_test.py @@ -302,6 +302,12 @@ def test_unlock_existing(self): self.assertFalse(utils.is_locked(self.directory / str(0))) self.assertFalse(utils.is_locked(self.directory / str(1))) + def test_unlock_deleted(self): + # Checkpoint does not exist; `_unlock_checkpoint` returns without raising. + checkpoint_utils._unlock_checkpoint( + self.directory, step=0, step_name_format=step_lib.standard_name_format() + ) + @parameterized.parameters( (None, None), (None, 8), diff --git a/checkpoint/orbax/checkpoint/path/step.py b/checkpoint/orbax/checkpoint/path/step.py index 5fecef8f3..47c93af7e 100644 --- a/checkpoint/orbax/checkpoint/path/step.py +++ b/checkpoint/orbax/checkpoint/path/step.py @@ -335,7 +335,7 @@ def find_step(self, base_path: epath.PathLike, step: int) -> Metadata: return metadata # Raise detailed error message. - raise ValueError( + raise FileNotFoundError( f'No step path found with name={self.build_name(step)},' f' NameFormat={self} for step={step} under {base_path}.' )