diff --git a/teuthology/dispatcher/supervisor.py b/teuthology/dispatcher/supervisor.py index b89c39ac5a..23c7d18c11 100644 --- a/teuthology/dispatcher/supervisor.py +++ b/teuthology/dispatcher/supervisor.py @@ -225,7 +225,7 @@ def reimage(job_config): try: reimaged = lock_ops.reimage_machines(ctx, targets, job_config['machine_type']) except Exception as e: - log.exception('Reimaging error. Nuking machines...') + log.exception('Reimaging error. Unlocking machines...') unlock_targets(job_config) # Reimage failures should map to the 'dead' status instead of 'fail' report.try_push_job_info( diff --git a/teuthology/lock/ops.py b/teuthology/lock/ops.py index 4fb6ba86a5..289f909669 100644 --- a/teuthology/lock/ops.py +++ b/teuthology/lock/ops.py @@ -143,7 +143,12 @@ def lock_many(ctx, num, machine_type, user=None, description=None, update_nodes(ok_machs) return ok_machs elif reimage and machine_type in reimage_types: - return reimage_machines(ctx, machines, machine_type) + try: + return reimage_machines(ctx, machines, machine_type) + except Exception: + log.exception('Reimaging error. Unlocking machines...') + unlock_many(machines, user) + continue return machines elif response.status_code == 503: log.error('Insufficient nodes available to lock %d %s nodes.',