Skip to content

Commit

Permalink
Consider errors from htcondor scheduler
Browse files Browse the repository at this point in the history
  • Loading branch information
fraimondo committed Oct 28, 2024
1 parent 1bd253c commit 0870a01
Showing 1 changed file with 32 additions and 14 deletions.
46 changes: 32 additions & 14 deletions joblib_htcondor/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -903,23 +903,44 @@ def _watcher(self) -> None:
logger.log(
level=9, msg=f"Submitting job {to_submit}"
)
to_submit.htcondor_submit_result = (
self._client.submit(
to_submit.htcondor_submit,
count=1,
try:
to_submit.htcondor_submit_result = (
self._client.submit(
to_submit.htcondor_submit,
count=1,
)
)
)
except OSError as e:
# Something went wrong, continue and submit
# this later
logger.error(f"Error submitting job: {e}")
logger.error(traceback.format_exc())
logger.error("Will try later.")

# Put the job back in the queue
self._queued_jobs_list.appendleft(to_submit)

# Wait a bit before trying again
time.sleep(1)
continue

logger.log(level=9, msg="Getting cluster id.")
# Set the cluster id
to_submit.cluster_id = ( # type: ignore
to_submit.htcondor_submit_result.cluster()
)
logger.log(level=9, msg="Job submitted.")
# Update the sent timestamp and cluster id
logger.log(
level=9, msg="Updating task status timestamp."
)
# Move to waiting jobs
self._waiting_jobs_deque.append(to_submit)
newly_queued += 1
update_meta = True

if self._export_metadata:
# Update the sent timestamp and cluster id
logger.log(
level=9,
msg="Updating task status timestamp.",
)
self._backend_meta.task_status[ # type: ignore
to_submit.task_id - 1
].sent_timestamp = datetime.now()
Expand All @@ -932,11 +953,8 @@ def _watcher(self) -> None:
to_submit.task_id - 1
].cluster_id = to_submit.cluster_id

logger.log(level=9, msg="Task status updated")
# Move to waiting jobs
self._waiting_jobs_deque.append(to_submit)
newly_queued += 1
update_meta = True
logger.log(level=9, msg="Task status updated")

if update_meta and self._export_metadata:
self.write_metadata()
# logger.debug("Waiting 0.1 seconds")
Expand Down

0 comments on commit 0870a01

Please sign in to comment.