From 557613f13cd463bb23b1c83c5808366f3bf144ee Mon Sep 17 00:00:00 2001 From: Orbax Authors Date: Wed, 3 Jul 2024 03:37:49 -0700 Subject: [PATCH] Fix possible race condition in orbax root directory creation. PiperOrigin-RevId: 649014114 --- checkpoint/orbax/checkpoint/checkpoint_manager.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/checkpoint/orbax/checkpoint/checkpoint_manager.py b/checkpoint/orbax/checkpoint/checkpoint_manager.py index d2e510f0..d2705826 100644 --- a/checkpoint/orbax/checkpoint/checkpoint_manager.py +++ b/checkpoint/orbax/checkpoint/checkpoint_manager.py @@ -364,7 +364,9 @@ def _create_root_directory( if not directory.exists() and utils.is_primary_host( multiprocessing_options.primary_host ): - directory.mkdir(parents=True) + # We need exists_ok=True because the directory might have been created due + # to a race condition. + directory.mkdir(parents=True, exist_ok=True) logging.info('Created directory=%s', directory) multihost.sync_global_processes( multihost.unique_barrier_key(