From 53dee5bad652860f4f0a32e7e019a4b78b96f6e1 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 15:13:30 -0400 Subject: [PATCH 01/22] first Mop WDL commit --- TAG_Mop/TAG_Mop.inputs.json | 9 ++ TAG_Mop/TAG_Mop.wdl | 182 ++++++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 TAG_Mop/TAG_Mop.inputs.json create mode 100644 TAG_Mop/TAG_Mop.wdl diff --git a/TAG_Mop/TAG_Mop.inputs.json b/TAG_Mop/TAG_Mop.inputs.json new file mode 100644 index 0000000..80bb9ac --- /dev/null +++ b/TAG_Mop/TAG_Mop.inputs.json @@ -0,0 +1,9 @@ +{ + "TAG_Mop.runMop": "Boolean", + "TAG_Mop.workspaceName": "String", + "TAG_Mop.removeFailedSubmissions": "Boolean", + "TAG_Mop.namespace": "String (optional, default = \"broadtagteam\")", + "TAG_Mop.mopDocker": "String (optional, default = \"us.gcr.io/tag-team-160914/neovax-parsley:2.2.1.0\")", + "TAG_Mop.remove_partially_fail": "Boolean (optional, default = false)" +} + diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl new file mode 100644 index 0000000..b497d2c --- /dev/null +++ b/TAG_Mop/TAG_Mop.wdl @@ -0,0 +1,182 @@ +version 1.0 + + workflow TAG_Mop{ + input{ + String namespace = "broadtagteam" + String workspaceName + String mopDocker = "us.gcr.io/tag-team-160914/neovax-parsley:2.2.1.0" + Boolean removeFailedSubmissions + Boolean runMop + Boolean remove_partially_fail = false + } + call rmSysfiles { + input: + namespace = namespace, + workspaceName = workspaceName, + mopDocker = mopDocker + } + + if (removeFailedSubmissions){ + call GetFailedSubmissions { + input: + namespace = namespace, + workspaceName = workspaceName, + mopDocker = mopDocker, + remove_partially_fail = removeFailedSubmissions + } + scatter (sid in GetFailedSubmissions.failed_submissions) { + call CleanupAFolder { + input: + bucket_name = GetFailedSubmissions.workspace_bucket, + submission_id = sid + } + } + } + + if (runMop){ + call mop { + input: + workspaceName = workspaceName, + mopDocker = mopDocker + } + } + + meta { + author: "Yueyao Gao" + email: "gaoyueya@broadinstitute.org" + description: "TAG Mop contains three sub-workflows: rmSysfiles, removeFailedSubmission, and mop. rmSysfiles removes system files that were generated from submissions from a Terra workspace. mop runs the Mop pipeline." + } + + } + + task rmSysfiles { + input{ + String namespace + String workspaceName + String mopDocker + } + command <<< + source activate NeoVax-Input-Parser + python <>> + output{ + Int deleted_sys_files = read_int("num_of_sys_files_to_delete.txt") + } + runtime { + docker: mopDocker + } + } + + task GetFailedSubmissions { + input { + String namespace + String workspaceName + Boolean remove_partially_fail + String mopDocker + } + command <<< + source activate NeoVax-Input-Parser + python3 <>> + runtime { + docker: mopDocker + } + output { + Array[String] failed_submissions = read_lines("failed_submissions.txt") + String workspace_bucket = read_string("workspace_bucket.txt") + + } + } + +task CleanupAFolder { + input { + String bucket_name + String submission_id + } + + command <<< + timeout 23h gsutil -q rm -rf gs://~{bucket_name}/submissions/~{submission_id} || echo "Timed out. Please try again." + >>> + + runtime { + cpu: 1 + memory: "4 GiB" + disks: "local-disk 10 HDD" + preemptible_tries: 1 + max_retries: 1 + docker:"us.gcr.io/google.com/cloudsdktool/google-cloud-cli:alpine" + } +} + + + task mop { + input{ + String workspaceName + String mopDocker + } + command <<< + source activate NeoVax-Input-Parser + python <>> + runtime { + docker: mopDocker + } + } From b4c2b034b39c2d27d283f6d106f30b9b806d55fb Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 15:17:52 -0400 Subject: [PATCH 02/22] added mop to dockstore --- .dockstore.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.dockstore.yml b/.dockstore.yml index 74db3c3..d050801 100644 --- a/.dockstore.yml +++ b/.dockstore.yml @@ -118,4 +118,9 @@ workflows: subclass: WDL primaryDescriptorPath: /CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl testParameterFiles: - - /CleanupFailedSubmissions/Cleanup_Failed_Submissions.inputs.json \ No newline at end of file + - /CleanupFailedSubmissions/Cleanup_Failed_Submissions.inputs.json + - name: TAG_Mop + subclass: WDL + primaryDescriptorPath: /TAG_Mop/TAG_Mop.wdl + testParameterFiles: + - /TAG_Mop/TAG_Mop.inputs.json \ No newline at end of file From c23058a7f8d0a5262facbbf3860a4c394157d73f Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 15:25:22 -0400 Subject: [PATCH 03/22] report number of sys files to remove in the output --- TAG_Mop/TAG_Mop.wdl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index b497d2c..2d33b7a 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -41,6 +41,11 @@ version 1.0 } } + + output{ + Int deleted_sys_files = rmSysfiles.deleted_sys_files + } + meta { author: "Yueyao Gao" email: "gaoyueya@broadinstitute.org" From bc3a2b4bf40426c0a2ca237adf33f24b7d66f48c Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 15:38:20 -0400 Subject: [PATCH 04/22] Let the tasks to be executed in order --- TAG_Mop/TAG_Mop.wdl | 65 ++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 30 deletions(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index 2d33b7a..bdf1b61 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -1,22 +1,25 @@ version 1.0 - workflow TAG_Mop{ - input{ - String namespace = "broadtagteam" - String workspaceName - String mopDocker = "us.gcr.io/tag-team-160914/neovax-parsley:2.2.1.0" - Boolean removeFailedSubmissions - Boolean runMop - Boolean remove_partially_fail = false - } - call rmSysfiles { - input: - namespace = namespace, - workspaceName = workspaceName, - mopDocker = mopDocker - } +workflow TAG_Mop { + input { + String namespace = "broadtagteam" + String workspaceName + String mopDocker = "us.gcr.io/tag-team-160914/neovax-parsley:2.2.1.0" + Boolean removeFailedSubmissions + Boolean runMop + Boolean remove_partially_fail = false + } - if (removeFailedSubmissions){ + scatter (task_index in range(3)) { + if (task_index == 0) { + call rmSysfiles { + input: + namespace = namespace, + workspaceName = workspaceName, + mopDocker = mopDocker + } + } + if (removeFailedSubmissions && task_index == 1) { call GetFailedSubmissions { input: namespace = namespace, @@ -30,29 +33,27 @@ version 1.0 bucket_name = GetFailedSubmissions.workspace_bucket, submission_id = sid } - } + } } - - if (runMop){ + if (runMop && task_index == 2) { call mop { input: workspaceName = workspaceName, mopDocker = mopDocker } } + } + output { + Int deleted_sys_files = rmSysfiles.deleted_sys_files + } - output{ - Int deleted_sys_files = rmSysfiles.deleted_sys_files - } - - meta { - author: "Yueyao Gao" - email: "gaoyueya@broadinstitute.org" - description: "TAG Mop contains three sub-workflows: rmSysfiles, removeFailedSubmission, and mop. rmSysfiles removes system files that were generated from submissions from a Terra workspace. mop runs the Mop pipeline." - } - + meta { + author: "Yueyao Gao" + email: "gaoyueya@broadinstitute.org" + description: "TAG Mop contains three sub-workflows: rmSysfiles, removeFailedSubmission, and mop. rmSysfiles removes system files that were generated from submissions from a Terra workspace. mop runs the Mop pipeline." } +} task rmSysfiles { input{ @@ -82,7 +83,7 @@ version 1.0 # Output the number of system files to delete with open('num_of_sys_files_to_delete.txt', 'w') as f: - f.write(len(sys_files_to_delete)) + f.write(str(len(sys_files_to_delete))) print(f"System Files to Delete in {namespace}/{workspaceName}: ", len(sys_files_to_delete)) for pattern in set([i.split('/')[-1] for i in sys_files_to_delete], desc="Deleting System Files", unit="pattern"): @@ -96,6 +97,8 @@ version 1.0 } runtime { docker: mopDocker + memory: "32 GiB" + cpu: 8 } } @@ -183,5 +186,7 @@ task CleanupAFolder { >>> runtime { docker: mopDocker + memory: "32 GiB" + cpu: 8 } } From 503997a6d49e2d6fab4b9c937205105ebbe0a91a Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 15:40:46 -0400 Subject: [PATCH 05/22] updated TAG mop command --- TAG_Mop/TAG_Mop.wdl | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index bdf1b61..c481877 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -168,6 +168,7 @@ task CleanupAFolder { task mop { input{ + String namespace String workspaceName String mopDocker } @@ -175,11 +176,13 @@ task CleanupAFolder { source activate NeoVax-Input-Parser python < Date: Sat, 13 Apr 2024 15:52:15 -0400 Subject: [PATCH 06/22] output mop sys files --- TAG_Mop/TAG_Mop.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index c481877..c9613d3 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -38,6 +38,7 @@ workflow TAG_Mop { if (runMop && task_index == 2) { call mop { input: + namespace = namespace, workspaceName = workspaceName, mopDocker = mopDocker } @@ -45,7 +46,7 @@ workflow TAG_Mop { } output { - Int deleted_sys_files = rmSysfiles.deleted_sys_files + Array[Int?] deleted_sys_files = rmSysfiles.deleted_sys_files } meta { From a2819a7b7607c6b12ce4dac247d56342764ee6bb Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 16:33:06 -0400 Subject: [PATCH 07/22] separate failed removal and mop WDL --- TAG_Mop/TAG_Mop.wdl | 137 ++++++++++---------------------------------- 1 file changed, 30 insertions(+), 107 deletions(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index c9613d3..1c0ec64 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -1,60 +1,43 @@ version 1.0 -workflow TAG_Mop { - input { - String namespace = "broadtagteam" - String workspaceName - String mopDocker = "us.gcr.io/tag-team-160914/neovax-parsley:2.2.1.0" - Boolean removeFailedSubmissions - Boolean runMop - Boolean remove_partially_fail = false - } +workflow TAG_Mop{ + input{ + String namespace = "broadtagteam" + String workspaceName + String mopDocker = "us.gcr.io/tag-team-160914/neovax-parsley:2.2.1.0" + Boolean runMop + } - scatter (task_index in range(3)) { - if (task_index == 0) { - call rmSysfiles { - input: - namespace = namespace, - workspaceName = workspaceName, - mopDocker = mopDocker - } + call rmSysfiles { + input: + namespace = namespace, + workspaceName = workspaceName, + mopDocker = mopDocker } - if (removeFailedSubmissions && task_index == 1) { - call GetFailedSubmissions { + + if (runMop){ + call mop { input: namespace = namespace, workspaceName = workspaceName, mopDocker = mopDocker, - remove_partially_fail = removeFailedSubmissions - } - scatter (sid in GetFailedSubmissions.failed_submissions) { - call CleanupAFolder { - input: - bucket_name = GetFailedSubmissions.workspace_bucket, - submission_id = sid - } + sysfiles = rmSysfiles.deleted_sys_files + } } - if (runMop && task_index == 2) { - call mop { - input: - namespace = namespace, - workspaceName = workspaceName, - mopDocker = mopDocker - } + + + output{ + Int deleted_sys_files = rmSysfiles.deleted_sys_files } - } - output { - Array[Int?] deleted_sys_files = rmSysfiles.deleted_sys_files - } + meta { + author: "Yueyao Gao" + email: "gaoyueya@broadinstitute.org" + description: "TAG Mop contains three sub-workflows: rmSysfiles, removeFailedSubmission, and mop. rmSysfiles removes system files that were generated from submissions from a Terra workspace. mop runs the Mop pipeline." + } - meta { - author: "Yueyao Gao" - email: "gaoyueya@broadinstitute.org" - description: "TAG Mop contains three sub-workflows: rmSysfiles, removeFailedSubmission, and mop. rmSysfiles removes system files that were generated from submissions from a Terra workspace. mop runs the Mop pipeline." } -} task rmSysfiles { input{ @@ -103,75 +86,12 @@ workflow TAG_Mop { } } - task GetFailedSubmissions { - input { - String namespace - String workspaceName - Boolean remove_partially_fail - String mopDocker - } - command <<< - source activate NeoVax-Input-Parser - python3 <>> - runtime { - docker: mopDocker - } - output { - Array[String] failed_submissions = read_lines("failed_submissions.txt") - String workspace_bucket = read_string("workspace_bucket.txt") - - } - } - -task CleanupAFolder { - input { - String bucket_name - String submission_id - } - - command <<< - timeout 23h gsutil -q rm -rf gs://~{bucket_name}/submissions/~{submission_id} || echo "Timed out. Please try again." - >>> - - runtime { - cpu: 1 - memory: "4 GiB" - disks: "local-disk 10 HDD" - preemptible_tries: 1 - max_retries: 1 - docker:"us.gcr.io/google.com/cloudsdktool/google-cloud-cli:alpine" - } -} - - task mop { input{ String namespace String workspaceName String mopDocker + Int sysfiles } command <<< source activate NeoVax-Input-Parser @@ -179,8 +99,11 @@ task CleanupAFolder { import firecloud.api as fapi import subprocess + print("System Files Deleted: ", ~{sysfiles}) + namespace = "~{namespace}" workspaceName = "~{workspaceName}" + print(f"Running Mop in {namespace}/{workspaceName}") # Run fissfc Mop to remove data that not presented in the data model subprocess.run(['fissfc', 'mop', '-w', workspaceName, '-p', namespace]) From 9cae31bf6e855fe856d66b477f0b70b56e8017a9 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 16:51:57 -0400 Subject: [PATCH 08/22] only delete sys files when there is any --- TAG_Mop/TAG_Mop.wdl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index 1c0ec64..bb742fa 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -70,8 +70,11 @@ workflow TAG_Mop{ f.write(str(len(sys_files_to_delete))) print(f"System Files to Delete in {namespace}/{workspaceName}: ", len(sys_files_to_delete)) - for pattern in set([i.split('/')[-1] for i in sys_files_to_delete], desc="Deleting System Files", unit="pattern"): - subprocess.run(['gsutil', '-m', 'rm', f'gs://{bucket_name}/**/{pattern}']) + if len(sys_files_to_delete) == 0: + print("No system files to delete") + else: + for pattern in set([i.split('/')[-1] for i in sys_files_to_delete], desc="Deleting System Files", unit="pattern"): + subprocess.run(['gsutil', '-m', 'rm', f'gs://{bucket_name}/**/{pattern}']) CODE From 88328d631f9f7dc05fc7eb3f2a7a4c96b709d583 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 17:57:46 -0400 Subject: [PATCH 09/22] Only mop when there is files to mop --- TAG_Mop/TAG_Mop.wdl | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index bb742fa..71cf4a7 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -29,6 +29,7 @@ workflow TAG_Mop{ output{ Int deleted_sys_files = rmSysfiles.deleted_sys_files + Int mopped_files = mop.num_of_files_to_mop } meta { @@ -98,22 +99,21 @@ workflow TAG_Mop{ } command <<< source activate NeoVax-Input-Parser - python < mop_dry_run.txt + echo Files to mop:" $(cat mop_dry_run.txt | wc -l)" + cat mop_dry_run.txt | wc -l > num_of_files_to_mop.txt + + # Mop + if [ $(cat mop_dry_run.txt | wc -l) -eq 0 ]; then + echo "No files to mop" + else + fissfc mop -w ~{workspaceName} -p ~{namespace} + fi >>> + output{ + Int num_of_files_to_mop = read_int("num_of_files_to_mop.txt") + } runtime { docker: mopDocker memory: "32 GiB" From d03c094db748af76072c072e0de3b1bcdba24d1d Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 18:01:25 -0400 Subject: [PATCH 10/22] make the mop output optional --- TAG_Mop/TAG_Mop.wdl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index 71cf4a7..874ca6b 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -29,7 +29,7 @@ workflow TAG_Mop{ output{ Int deleted_sys_files = rmSysfiles.deleted_sys_files - Int mopped_files = mop.num_of_files_to_mop + Int? mopped_files = mop.num_of_files_to_mop } meta { @@ -99,6 +99,8 @@ workflow TAG_Mop{ } command <<< source activate NeoVax-Input-Parser + # The number of system files that were deleted + echo "System Files Deleted: ~{sysfiles}" # Dry run Mop fissfc mop -w ~{workspaceName} -p ~{namespace} --dry-run > mop_dry_run.txt echo Files to mop:" $(cat mop_dry_run.txt | wc -l)" From e418294dd17498d7ec9396f5eb2205648c1281b4 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 18:44:37 -0400 Subject: [PATCH 11/22] output the file that were mopped --- TAG_Mop/TAG_Mop.wdl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index 874ca6b..f0be237 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -30,6 +30,7 @@ workflow TAG_Mop{ output{ Int deleted_sys_files = rmSysfiles.deleted_sys_files Int? mopped_files = mop.num_of_files_to_mop + File? mop_dry_run = mop.mopped_files } meta { @@ -115,6 +116,7 @@ workflow TAG_Mop{ >>> output{ Int num_of_files_to_mop = read_int("num_of_files_to_mop.txt") + File mopped_files = "mop_dry_run.txt" } runtime { docker: mopDocker From fa5818feab49de2dcad212560c6a28380f3fb1c0 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 18:47:02 -0400 Subject: [PATCH 12/22] update output file names --- TAG_Mop/TAG_Mop.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index f0be237..1364d06 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -29,8 +29,8 @@ workflow TAG_Mop{ output{ Int deleted_sys_files = rmSysfiles.deleted_sys_files - Int? mopped_files = mop.num_of_files_to_mop - File? mop_dry_run = mop.mopped_files + Int? num_mopped_files = mop.num_of_files_to_mop + File? mopped_files = mop.mopped_files } meta { From c3bc236491ea1e73e6a475ce79bf36891953831d Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 18:50:41 -0400 Subject: [PATCH 13/22] update meta info --- TAG_Mop/TAG_Mop.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index 1364d06..54d2921 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -36,7 +36,7 @@ workflow TAG_Mop{ meta { author: "Yueyao Gao" email: "gaoyueya@broadinstitute.org" - description: "TAG Mop contains three sub-workflows: rmSysfiles, removeFailedSubmission, and mop. rmSysfiles removes system files that were generated from submissions from a Terra workspace. mop runs the Mop pipeline." + description: "TAG Mop contains three sub-workflows: rmSysfiles and mop. rmSysfiles removes system files that were generated from submissions from a Terra workspace. mop runs the FISS Mop function. Suggest to run after cleanupFailedSubmission.wdl" } } From 5fabad7706a089e551105352f86e591b39c7de79 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 19:41:24 -0400 Subject: [PATCH 14/22] print sys files --- TAG_Mop/TAG_Mop.wdl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index 54d2921..63e5cc5 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -71,11 +71,12 @@ workflow TAG_Mop{ with open('num_of_sys_files_to_delete.txt', 'w') as f: f.write(str(len(sys_files_to_delete))) print(f"System Files to Delete in {namespace}/{workspaceName}: ", len(sys_files_to_delete)) + print(sys_files_to_delete) if len(sys_files_to_delete) == 0: print("No system files to delete") else: - for pattern in set([i.split('/')[-1] for i in sys_files_to_delete], desc="Deleting System Files", unit="pattern"): + for pattern in set([i.split('/')[-1] for i in sys_files_to_delete]): subprocess.run(['gsutil', '-m', 'rm', f'gs://{bucket_name}/**/{pattern}']) CODE From 03a9cc0e5586a9db432e77f210e850456403aec6 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 19:50:56 -0400 Subject: [PATCH 15/22] added list of sys files that will be deleted --- TAG_Mop/TAG_Mop.wdl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index 63e5cc5..6c1b1d5 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -29,6 +29,7 @@ workflow TAG_Mop{ output{ Int deleted_sys_files = rmSysfiles.deleted_sys_files + File sys_files_to_delete = rmSysfiles.sys_files_to_delete Int? num_mopped_files = mop.num_of_files_to_mop File? mopped_files = mop.mopped_files } @@ -52,6 +53,7 @@ workflow TAG_Mop{ python <>> output{ Int deleted_sys_files = read_int("num_of_sys_files_to_delete.txt") + File sys_files_to_delete = "sys_files_to_delete.txt" } runtime { docker: mopDocker From 33474ec37a63b9c4f553ebd3aa0d067988925d35 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 20:38:54 -0400 Subject: [PATCH 16/22] updated output name --- TAG_Mop/TAG_Mop.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index 6c1b1d5..655a747 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -28,8 +28,8 @@ workflow TAG_Mop{ output{ - Int deleted_sys_files = rmSysfiles.deleted_sys_files - File sys_files_to_delete = rmSysfiles.sys_files_to_delete + Int num_deleted_sys_files = rmSysfiles.deleted_sys_files + File deleted_sys_files = rmSysfiles.sys_files_to_delete Int? num_mopped_files = mop.num_of_files_to_mop File? mopped_files = mop.mopped_files } From c9bce71639ccb1eeea7881dac45f7078e17c016f Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 21:10:34 -0400 Subject: [PATCH 17/22] output the number of failed submission to clean --- CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl b/CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl index 43b7f36..d6f326a 100644 --- a/CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl +++ b/CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl @@ -35,6 +35,10 @@ workflow Cleanup_Failed_Submissions { submission_id = sid } } + +output { + Int num_failed_submissions = length(GetWorkspaceInfo.failed_submissions) + } } task GetWorkspaceInfo { From 7db0e62600a6eeeaa16603d59a3d1b28d99b5acb Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 23:13:32 -0400 Subject: [PATCH 18/22] remove files even they are in the old Terra workspaces --- CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl b/CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl index d6f326a..752d094 100644 --- a/CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl +++ b/CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl @@ -91,7 +91,14 @@ task CleanupAFolder { } command <<< - timeout 23h gsutil -q rm -rf gs://~{bucket_name}/submissions/~{submission_id} || echo "Timed out. Please try again." + # Older version of Terra does not have the submission folder + if gsutil -q ls gs://~{bucket_name}/submissions/~{submission_id} >/dev/null 2>&1; then + timeout 23h gsutil -q rm -rf gs://~{bucket_name}/submissions/~{submission_id} || echo "Timed out. Please try again." + elif gsutil -q ls gs://~{bucket_name}/~{submission_id} >/dev/null 2>&1; then + timeout 23h gsutil -q rm -rf gs://~{bucket_name}/~{submission_id} || echo "Timed out. Please try again." + else + echo "File path does not exist." + fi >>> runtime { From caa8202a68c7e376529eee01e86a873fa56c2fa4 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 23:17:47 -0400 Subject: [PATCH 19/22] remove inrelevant input in json --- TAG_Mop/TAG_Mop.inputs.json | 2 -- 1 file changed, 2 deletions(-) diff --git a/TAG_Mop/TAG_Mop.inputs.json b/TAG_Mop/TAG_Mop.inputs.json index 80bb9ac..32943b1 100644 --- a/TAG_Mop/TAG_Mop.inputs.json +++ b/TAG_Mop/TAG_Mop.inputs.json @@ -1,9 +1,7 @@ { "TAG_Mop.runMop": "Boolean", "TAG_Mop.workspaceName": "String", - "TAG_Mop.removeFailedSubmissions": "Boolean", "TAG_Mop.namespace": "String (optional, default = \"broadtagteam\")", "TAG_Mop.mopDocker": "String (optional, default = \"us.gcr.io/tag-team-160914/neovax-parsley:2.2.1.0\")", - "TAG_Mop.remove_partially_fail": "Boolean (optional, default = false)" } From e759ff3989dd75b3a7e6945dcd5ada43a80d68eb Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 23:44:55 -0400 Subject: [PATCH 20/22] print out comment if this workspace has been cleaned up --- CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl b/CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl index 752d094..25bc6c1 100644 --- a/CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl +++ b/CleanupFailedSubmissions/Cleanup_Failed_Submissions.wdl @@ -97,7 +97,7 @@ task CleanupAFolder { elif gsutil -q ls gs://~{bucket_name}/~{submission_id} >/dev/null 2>&1; then timeout 23h gsutil -q rm -rf gs://~{bucket_name}/~{submission_id} || echo "Timed out. Please try again." else - echo "File path does not exist." + echo "Failed submission folder not found. This workspace has been cleaned up already." fi >>> From 1f755d8906c32618d6311c58167d3454b975e98b Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Sat, 13 Apr 2024 23:48:00 -0400 Subject: [PATCH 21/22] update input json again --- TAG_Mop/TAG_Mop.inputs.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TAG_Mop/TAG_Mop.inputs.json b/TAG_Mop/TAG_Mop.inputs.json index 32943b1..0c75a57 100644 --- a/TAG_Mop/TAG_Mop.inputs.json +++ b/TAG_Mop/TAG_Mop.inputs.json @@ -1,7 +1,7 @@ { - "TAG_Mop.runMop": "Boolean", + "TAG_Mop.mopDocker": "String (optional, default = \"us.gcr.io/tag-team-160914/neovax-parsley:2.2.1.0\")", "TAG_Mop.workspaceName": "String", "TAG_Mop.namespace": "String (optional, default = \"broadtagteam\")", - "TAG_Mop.mopDocker": "String (optional, default = \"us.gcr.io/tag-team-160914/neovax-parsley:2.2.1.0\")", + "TAG_Mop.runMop": "Boolean" } From 6381ad65d97bb2f39c446ea7d6f017593f231bf8 Mon Sep 17 00:00:00 2001 From: Yueyao Gao Date: Thu, 16 May 2024 14:21:17 -0400 Subject: [PATCH 22/22] updated based on PR comments --- TAG_Mop/TAG_Mop.wdl | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/TAG_Mop/TAG_Mop.wdl b/TAG_Mop/TAG_Mop.wdl index 655a747..d78c51c 100644 --- a/TAG_Mop/TAG_Mop.wdl +++ b/TAG_Mop/TAG_Mop.wdl @@ -47,6 +47,8 @@ workflow TAG_Mop{ String namespace String workspaceName String mopDocker + Int memory = 32 + Int cpu = 8 } command <<< source activate NeoVax-Input-Parser @@ -60,7 +62,7 @@ workflow TAG_Mop{ bucket_name = fapi.get_workspace(namespace, workspaceName).json()['workspace']['bucketName'] # Collect the system files to delete - storage_client = storage.Client() + storage_client = storage.Client(namespace) blobs = storage_client.list_blobs(bucket_name, projection='full') patterns_to_remove = ["stdout.log", "stderr.log", "localization.sh", "gcs_transfer.sh", "/stdout","/stderr","/rc","-rc.txt",'/memory_retry_rc','/output','/script','/exec.sh'] sys_files_to_delete = [] @@ -92,8 +94,8 @@ workflow TAG_Mop{ } runtime { docker: mopDocker - memory: "32 GiB" - cpu: 8 + memory: memory + " GiB" + cpu: cpu } } @@ -103,6 +105,8 @@ workflow TAG_Mop{ String workspaceName String mopDocker Int sysfiles + Int memory = 32 + Int cpu = 8 } command <<< source activate NeoVax-Input-Parser @@ -126,7 +130,7 @@ workflow TAG_Mop{ } runtime { docker: mopDocker - memory: "32 GiB" - cpu: 8 + memory: memory + " GiB" + cpu: cpu } }