Skip to content

Commit 5ca4f44

Browse files
feat(manager): add Kueue scheduling option for user workloads (#658)
Introduces Kueue as an alternative way to submit user jobs. Kueue will schedule Kubernetes jobs representing user runtime batch and job workloads. Co-authored-by: Xavier Tintin <[email protected]>. Closes reanahub/reana#800
1 parent d939f25 commit 5ca4f44

File tree

3 files changed

+21
-5
lines changed

3 files changed

+21
-5
lines changed

AUTHORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ The list of contributors in alphabetical order:
2525
- [Sinclert Perez](https://www.linkedin.com/in/sinclert)
2626
- [Tibor Simko](https://orcid.org/0000-0001-7202-5803)
2727
- [Vladyslav Moisieienkov](https://orcid.org/0000-0001-9717-0775)
28+
- [Xavier Tintin](https://orcid.org/0000-0002-3150-9112)

reana_workflow_controller/config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
SHARED_VOLUME_PATH,
1818
)
1919
from reana_db.models import JobStatus, RunStatus
20+
from distutils.util import strtobool
2021

2122
from reana_workflow_controller.version import __version__
2223

@@ -394,6 +395,12 @@ def _parse_interactive_sessions_environments(env_var):
394395
]
395396
"""Alive workflow statuses."""
396397

398+
KUEUE_ENABLED = bool(strtobool(os.getenv("KUEUE_ENABLED", "False")))
399+
"""Whether to use Kueue for workflow scheduling."""
400+
401+
KUEUE_LOCAL_QUEUE_NAME = "local-queue-batch"
402+
"""Name of the local queue to be used by Kueue."""
403+
397404
REANA_RUNTIME_BATCH_TERMINATION_GRACE_PERIOD = int(
398405
os.getenv("REANA_RUNTIME_BATCH_TERMINATION_GRACE_PERIOD", "120")
399406
)

reana_workflow_controller/workflow_run_manager.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@
9292
REANA_DASK_CLUSTER_DEFAULT_NUMBER_OF_WORKERS,
9393
REANA_DASK_CLUSTER_DEFAULT_SINGLE_WORKER_MEMORY,
9494
REANA_DASK_CLUSTER_DEFAULT_SINGLE_WORKER_THREADS,
95+
KUEUE_ENABLED,
96+
KUEUE_LOCAL_QUEUE_NAME,
9597
)
9698

9799

@@ -623,12 +625,17 @@ def _create_job_spec(
623625
self.workflow.workspace_path
624626
)
625627

628+
labels = {
629+
"reana_workflow_mode": "batch",
630+
"reana-run-batch-workflow-uuid": str(self.workflow.id_),
631+
}
632+
633+
if KUEUE_ENABLED:
634+
labels["kueue.x-k8s.io/queue-name"] = KUEUE_LOCAL_QUEUE_NAME
635+
626636
workflow_metadata = client.V1ObjectMeta(
627637
name=name,
628-
labels={
629-
"reana_workflow_mode": "batch",
630-
"reana-run-batch-workflow-uuid": str(self.workflow.id_),
631-
},
638+
labels=labels,
632639
namespace=REANA_RUNTIME_KUBERNETES_NAMESPACE,
633640
)
634641

@@ -727,6 +734,7 @@ def _create_job_spec(
727734
{"name": "USER", "value": user}, # Required by HTCondor
728735
{"name": "K8S_CERN_EOS_AVAILABLE", "value": K8S_CERN_EOS_AVAILABLE},
729736
{"name": "IMAGE_PULL_SECRETS", "value": ",".join(IMAGE_PULL_SECRETS)},
737+
{"name": "KUEUE_ENABLED", "value": str(KUEUE_ENABLED)},
730738
{
731739
"name": "REANA_SQLALCHEMY_DATABASE_URI",
732740
"value": SQLALCHEMY_DATABASE_URI,
@@ -835,7 +843,7 @@ def _create_job_spec(
835843
spec.template.spec.volumes.append(k8s_code_volume)
836844

837845
for container in spec.template.spec.containers:
838-
container.env.extend(current_app.config["DEBUG_ENV_VARS"])
846+
# container.env.extend(current_app.config["DEBUG_ENV_VARS"])
839847
sub_path = f"reana-{container.name}"
840848
if container.name == "workflow-engine":
841849
sub_path += f"-{self.workflow.type_}"

0 commit comments

Comments
 (0)