From 249c156b034de72686463484b8327583d5abfa7e Mon Sep 17 00:00:00 2001 From: taxe10 Date: Fri, 5 Apr 2024 21:59:22 -0700 Subject: [PATCH 1/4] add forward ports and submission ssh key --- .env.example | 2 ++ docker-compose.yml | 2 ++ src/frontend.py | 8 +++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 32cb42d..57c216c 100644 --- a/.env.example +++ b/.env.example @@ -23,3 +23,5 @@ MAX_TIME_CPU="1:00:00" PARTITIONS_GPU='["p_gpu1", "p_gpu2"]' RESERVATIONS_GPU='["r_gpu1", "r_gpu2"]' MAX_TIME_GPU="1:00:00" +SUBMISSION_SSH_KEY="~/.ssh/id_rsa" +FORWARD_PORTS='["8888:8888"]' diff --git a/docker-compose.yml b/docker-compose.yml index 85b7ae8..39b17a8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -25,6 +25,8 @@ services: PARTITIONS_GPU: "${PARTITIONS_GPU}" RESERVATIONS_GPU: "${RESERVATIONS_GPU}" MAX_TIME_GPU: "${MAX_TIME_GPU}" + SUBMISSION_SSH_KEY: "${SUBMISSION_SSH_KEY}" + FORWARD_PORTS: "${FORWARD_PORTS}" volumes: - $READ_DIR:/app/work/data - $WRITE_DIR:/app/work/mlex_store diff --git a/src/frontend.py b/src/frontend.py index cb1efb0..b14c5f8 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -52,6 +52,8 @@ PARTITIONS_GPU = os.getenv("PARTITIONS_CPU", []) RESERVATIONS_GPU = os.getenv("RESERVATIONS_CPU", []) MAX_TIME_GPU = os.getenv("MAX_TIME_CPU", "1:00:00") +SUBMISSION_SSH_KEY = os.getenv("SUBMISSION_SSH_KEY", "") +FORWARD_PORTS = os.getenv("FORWARD_PORTS", "") # Mlex content api CONTENT_API_URL = os.getenv("CONTENT_API_URL", "http://localhost:8000/api/v0/models") @@ -99,6 +101,8 @@ "reservations": RESERVATIONS_CPU, "max_time": MAX_TIME_CPU, "conda_env_name": "mlex_dimension_reduction_pca", + "submission_ssh_key": SUBMISSION_SSH_KEY, + "forward_ports": FORWARD_PORTS, "params": { "io_parameters": {"uid_save": "uid0001", "uid_retrieve": ""} }, @@ -370,7 +374,7 @@ def submit_dimension_reduction_job( "params": auto_params, "python_file_name": "mlex_pytorch_autoencoders/src/predict_model.py", } - else: + else: # slurm autoencoder_params = { "job_name": "latent_space_explorer", "num_nodes": 1, @@ -378,6 +382,8 @@ def submit_dimension_reduction_job( "reservations": RESERVATIONS_GPU, "max_time": MAX_TIME_GPU, "conda_env_name": "pytorch_autoencoders", + "submission_ssh_key": SUBMISSION_SSH_KEY, + "forward_ports": FORWARD_PORTS, "params": auto_params, } job_params["params_list"].insert(0, autoencoder_params) From 7a560ff1b509e4f670541ae92bad43bb8048b88f Mon Sep 17 00:00:00 2001 From: taxe10 Date: Fri, 5 Apr 2024 22:30:41 -0700 Subject: [PATCH 2/4] load lists from env variables --- src/frontend.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index b14c5f8..e7448db 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -46,14 +46,14 @@ FLOW_TYPE = os.getenv("FLOW_TYPE", "podman") # Slurm -PARTITIONS_CPU = os.getenv("PARTITIONS_CPU", []) -RESERVATIONS_CPU = os.getenv("RESERVATIONS_CPU", []) +PARTITIONS_CPU = json.loads(os.getenv("PARTITIONS_CPU", [])) +RESERVATIONS_CPU = json.loads(os.getenv("RESERVATIONS_CPU", [])) MAX_TIME_CPU = os.getenv("MAX_TIME_CPU", "1:00:00") -PARTITIONS_GPU = os.getenv("PARTITIONS_CPU", []) -RESERVATIONS_GPU = os.getenv("RESERVATIONS_CPU", []) +PARTITIONS_GPU = json.loads(os.getenv("PARTITIONS_CPU", [])) +RESERVATIONS_GPU = json.loads(os.getenv("RESERVATIONS_CPU", [])) MAX_TIME_GPU = os.getenv("MAX_TIME_CPU", "1:00:00") SUBMISSION_SSH_KEY = os.getenv("SUBMISSION_SSH_KEY", "") -FORWARD_PORTS = os.getenv("FORWARD_PORTS", "") +FORWARD_PORTS = json.loads(os.getenv("FORWARD_PORTS", [])) # Mlex content api CONTENT_API_URL = os.getenv("CONTENT_API_URL", "http://localhost:8000/api/v0/models") From 258bdf56481603642e8b737e7caad03d24360f97 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sat, 6 Apr 2024 00:22:12 -0700 Subject: [PATCH 3/4] add missing filename in slurm jobs for autoencoder --- src/frontend.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/frontend.py b/src/frontend.py index e7448db..fe76458 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -382,6 +382,7 @@ def submit_dimension_reduction_job( "reservations": RESERVATIONS_GPU, "max_time": MAX_TIME_GPU, "conda_env_name": "pytorch_autoencoders", + "python_file_name": "mlex_pytorch_autoencoders/src/predict_model.py", "submission_ssh_key": SUBMISSION_SSH_KEY, "forward_ports": FORWARD_PORTS, "params": auto_params, From 5898c70481568bc43f5af990be37a63add658d20 Mon Sep 17 00:00:00 2001 From: taxe10 Date: Sat, 6 Apr 2024 00:29:17 -0700 Subject: [PATCH 4/4] updating conda env name --- src/frontend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/frontend.py b/src/frontend.py index fe76458..bc84a70 100755 --- a/src/frontend.py +++ b/src/frontend.py @@ -370,7 +370,7 @@ def submit_dimension_reduction_job( } elif FLOW_TYPE == "conda": autoencoder_params = { - "conda_env_name": "pytorch_autoencoders", + "conda_env_name": "mlex_pytorch_autoencoders", "params": auto_params, "python_file_name": "mlex_pytorch_autoencoders/src/predict_model.py", } @@ -381,7 +381,7 @@ def submit_dimension_reduction_job( "partitions": PARTITIONS_GPU, "reservations": RESERVATIONS_GPU, "max_time": MAX_TIME_GPU, - "conda_env_name": "pytorch_autoencoders", + "conda_env_name": "mlex_pytorch_autoencoders", "python_file_name": "mlex_pytorch_autoencoders/src/predict_model.py", "submission_ssh_key": SUBMISSION_SSH_KEY, "forward_ports": FORWARD_PORTS,