From 22c569a9afbe02234ee5d3335b9331d74ef45802 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Fri, 21 Mar 2025 14:47:57 -0400
Subject: [PATCH 01/26] scaffold_and_refine_multitaxa: make e-mail address
 optional; determine by introspection if possible

This changes `scaffold_and_refine_multitaxa` workflow so that rather than having `emailAddress` as a required input, the e-mail address of the active user is obtained by introspection of the execution environment iff running on Terra
---
 pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
index b77a12874..b42dfb7bf 100644
--- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
+++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
@@ -20,6 +20,7 @@ workflow scaffold_and_refine_multitaxa {
         File    contigs_fasta
 
         File    taxid_to_ref_accessions_tsv
+        String? email_address
 
         String? biosample_accession
     }
@@ -27,6 +28,12 @@ workflow scaffold_and_refine_multitaxa {
     Int    min_scaffold_unambig = 300 # in base-pairs; any scaffolded assembly < this length will not be refined/polished
     String sample_original_name = select_first([sample_name, sample_id])
 
+    # get user email address, with the following precedence:
+    # 1. email_address provided via WDL input
+    # 2. user_email determined by introspection via check_terra_env task
+    # 3. (empty string fallback)
+    String? user_email_address = select_first([email_address,check_terra_env.user_email, ""])
+
     # download (multi-segment) genomes for each reference, fasta filename = colon-concatenated accession list
     scatter(taxon in read_tsv(taxid_to_ref_accessions_tsv)) {
         # taxon = [taxid, isolate_prefix, taxname, semicolon_delim_accession_list]
@@ -38,7 +45,8 @@ workflow scaffold_and_refine_multitaxa {
         call ncbi.download_annotations {
             input:
                 accessions = string_split.tokens,
-                combined_out_prefix = sub(taxon[3], ":", "-")  # singularity does not like colons in filenames
+                combined_out_prefix = sub(taxon[3], ":", "-"),  # singularity does not like colons in filenames
+                emailAddress = user_email_address
         }
     }
 

From d5d28dc52c781775da14950093e61549fa0149ae Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 13:03:57 -0400
Subject: [PATCH 02/26] task `download_from_url`: only download https[s]; pass
 through non-http[s] input url to output (i.e. gs://, drs://, etc.) for direct
 consumption downstream

This changes the task `download_from_url` to only download https[s] URLs; non-http[s] input urls will be passed through directly to the output for direct consumption downstream in tasks that can localize such protocols (i.e. gs://, drs://, etc.). The task does this simply by checking the URL prefix/protocol, but we would ideally decide to download based on introspection of the executor and its localization capabilities and configuration. After calling `download_from_url`, downstream tasks can then consume http[s] (or gs:// etc. paths) by selecting which output of `download_from_url` is defined:
`select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])`
This was added with public http[s]-accessible databases in mind, such as the reference genome list from `broadinstitute/viral-references` used for `scaffold_and_refine_multitaxa`.
This also adds a new workflow, `download_file`, to call the task separately from invocation in other workflows.
---
 .dockstore.yml                        |   3 +
 pipes/WDL/tasks/tasks_utils.wdl       | 234 +++++++++++++++-----------
 pipes/WDL/workflows/download_file.wdl |  33 ++++
 3 files changed, 175 insertions(+), 95 deletions(-)
 create mode 100644 pipes/WDL/workflows/download_file.wdl

diff --git a/.dockstore.yml b/.dockstore.yml
index c0c16092a..4e49df442 100644
--- a/.dockstore.yml
+++ b/.dockstore.yml
@@ -109,6 +109,9 @@ workflows:
   - name: diff_genome_sets
     subclass: WDL
     primaryDescriptorPath: /pipes/WDL/workflows/diff_genome_sets.wdl
+  - name: download_file
+    subclass: WDL
+    primaryDescriptorPath: /pipes/WDL/workflows/download_file.wdl
   - name: downsample
     subclass: WDL
     primaryDescriptorPath: /pipes/WDL/workflows/downsample.wdl
diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index c2c936ecb..758462818 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -367,8 +367,14 @@ task tar_extract {
 }
 
 task download_from_url {
+    # This task can be used prior to another task that consumes a file from a URL, 
+    # but where the input "protocol" is not know in advance (http[s]:// vs. gs://,drs://,etc.)
+    #
+    # After calling download_from_url, downstream tasks can then simply say:
+    #     select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
+    # This will allow the downstream task to consume the file since at that point it will be a bucket-local path, even if it was initially a web address.
     meta {
-        description: "Download a file from a URL. This task exists as a workaround until Terra supports this functionality natively (cromwell already does: https://cromwell.readthedocs.io/en/stable/filesystems/HTTP/). http[s] and ftp supported"
+        description: "Download a file from a URL if http[s], otherwise pass the URL through to the output. This task exists as a workaround until Terra supports http[s] inputs natively (cromwell already does: https://cromwell.readthedocs.io/en/stable/filesystems/HTTP/). http[s] and ftp supported"
         volatile: true
     }
     input {
@@ -388,7 +394,7 @@ task download_from_url {
 
     parameter_meta {
       url_to_download: {
-        description: "The URL to download; this is passed to wget"
+        description: "The URL to download; this is passed to wget. If this is not an http[s] URL, the value is passed through unchanged to the 'path_str' output."      
       }
       
       output_filename: {
@@ -415,107 +421,131 @@ task download_from_url {
     }
 
     String download_subdir_local = "downloaded"
+
     command <<<
+        echo "false" > WAS_HTTP_DOWNLOAD
+
         # enforce that only one source of expected md5 hash can be provided
         ~{if defined(md5_hash_expected) && defined(md5_hash_expected_file_url) then 'echo "The inputs \'md5_hash_expected\' and \'md5_hash_expected_file_url\' cannot both be specified; please provide only one."; exit 1;' else ''}
 
-        mkdir -p "~{download_subdir_local}/tmp"
-        
-        pushd "~{download_subdir_local}"
-        
-        # ---- download desired file
-        pushd "tmp"
+        #touch FILE_LOCATION SIZE_OF_DOWNLOADED_FILE_BYTES MD5_SUM_OF_DOWNLOADED_FILE
 
-        # if a URL-encoded version of the requested download is needed
-        #encoded_url=$(python3 -c "import urllib.parse; print urllib.parse.quote('''~{url_to_download}''')")
-        
-        # get the desired file using wget
-        # --content-disposition = use the file name suggested by the server via the Content-Disposition header
-        # --trust-server-names = ...and in the event of a redirect, use the value of the final page rather than that of the original url
-        # --save-headers = save the headers sent by the HTTP server to the file, preceding the actual contents, with an empty line as the separator.
-        wget \
-        --read-timeout 3 --waitretry 30 \
-        --no-verbose \
-        --method ~{request_method} \
-        ~{if defined(output_filename) then "--output-document ~{output_filename}" else ""} \
-        --tries ~{request_max_retries} \
-        --content-disposition --trust-server-names ~{additional_wget_opts} \
-        '~{url_to_download}' \
-        ~{if save_response_header_to_file then "--save-headers" else ""} || (echo "ERROR: request to ~{request_method} file from URL failed: ~{url_to_download}"; exit 1)
-
-        # ----
-
-        # get the name of the downloaded file
-        downloaded_file_name="$(basename "$(ls -1 | head -n1)")"
-
-        if [ ! -f "$downloaded_file_name" ]; then
-            echo "Could not locate downloaded file \"$downloaded_file_name\""
-            exit 1
-        fi
-        
-        if [ ! -s "$downloaded_file_name" ]; then
-            echo "Downloaded file appears empty: \"$downloaded_file_name\""
-            exit 1
-        fi
+        # if this is an http[s] url, download the file
+        # (otherwise just pass through the URL to the 'path_str' output)
+        if [[ ("~{url_to_download}" =~ ^(http|https|drs):// ) ]]; then
+            mkdir -p "~{download_subdir_local}/tmp"
+            
+            pushd "~{download_subdir_local}"
+            
+            # ---- download desired file
+            pushd "tmp"
 
-        popd # return to downloaded/
+            # resolve any redirects to the final URL
+            # wget will do this automatically, but this snipped may be helpful if we ever need to output the final URL from this task
+            # resolved_url="$(curl -Ls -o /dev/null -w '%{url_effective}' '~{url_to_download}')"
+            # echo "resolved_url: ${resolved_url}"
 
-        # (only for http(s)) split http response headers from response body
-        # since wget stores both in a single file separated by a couple newlines
-        if [[ "~{url_to_download}" =~ ^https?:// ]] && ~{if save_response_header_to_file then "true" else "false"}; then
-            echo "Saving response headers separately..."
-            csplit -f response -s "tmp/${downloaded_file_name}" $'/^\r$/+1' && \
-                mv response00 "../${downloaded_file_name}.headers" && \
-                mv response01 "${downloaded_file_name}" && \
-                rm "tmp/$downloaded_file_name"
-        else
-            mv "tmp/${downloaded_file_name}" "${downloaded_file_name}"
-        fi
-        # alternative python implementation to split response headers from body
-        #   via https://stackoverflow.com/a/75483099
-        #python3 << CODE
-        #if ~{if save_response_header_to_file then "True" else "False"}:
-        #    with open("tmp/${downloaded_file_name}", "rb") as f_downloaded:
-        #        headers, body = f_downloaded.read().split(b"\r\n\r\n", 1)
-        #        # write the response header to a file
-        #        with open("${downloaded_file_name}.headers", "wb") as f_headers:
-        #            f_headers.write(headers)
-        #            f_headers.write(b"\r\n")
-        #        # save the file body to its final location
-        #        with open("${downloaded_file_name}", "wb") as f:
-        #            f.write(body)
-        #else:
-        #    ## if headers are not being saved, move the file to its final destination
-        #    import shutil
-        #    shutil.move("tmp/${downloaded_file_name}","${downloaded_file_name}")
-        #CODE
-        
-        rm -r "tmp"
+            # if a URL-encoded version of the requested download is needed
+            #encoded_url=$(python3 -c "import urllib.parse; print urllib.parse.quote('''~{url_to_download}''')")
+            
+            # get the desired file using wget
+            # --content-disposition = use the file name suggested by the server via the Content-Disposition header
+            # --trust-server-names = ...and in the event of a redirect, use the value of the final page rather than that of the original url
+            # --save-headers = save the headers sent by the HTTP server to the file, preceding the actual contents, with an empty line as the separator.
+            wget \
+            --read-timeout 3 --waitretry 30 \
+            --no-verbose \
+            --method ~{request_method} \
+            ~{if defined(output_filename) then "--output-document ~{output_filename}" else ""} \
+            --tries ~{request_max_retries} \
+            --content-disposition --trust-server-names ~{additional_wget_opts} \
+            '~{url_to_download}' \
+            ~{if save_response_header_to_file then "--save-headers" else ""} || (echo "ERROR: request to ~{request_method} file from URL failed: ~{url_to_download}"; exit 1)
+
+            # ----
+
+            # get the name of the downloaded file
+            downloaded_file_name="$(basename "$(ls -1 | head -n1)")"
+
+            if [ ! -f "$downloaded_file_name" ]; then
+                echo "Could not locate downloaded file \"$downloaded_file_name\""
+                exit 1
+            fi
+            
+            if [ ! -s "$downloaded_file_name" ]; then
+                echo "Downloaded file appears empty: \"$downloaded_file_name\""
+                exit 1
+            fi
 
-        popd # return to job working directory
+            popd # return to downloaded/
 
-        check_md5_sum() {
-            # $1 =  md5sum expected
-            # $2 =  md5sum of downloaded file
-            if [[ "$1" != "$2" ]]; then
-                echo "ERROR: md5sum of downloaded file ($2) did not match md5sum expected ($1)";
-                exit 1
+            # (only for http(s)) split http response headers from response body
+            # since wget stores both in a single file separated by a couple newlines
+            if [[ "~{url_to_download}" =~ ^https?:// ]] && ~{if save_response_header_to_file then "true" else "false"}; then
+                echo "Saving response headers separately..."
+                csplit -f response -s "tmp/${downloaded_file_name}" $'/^\r$/+1' && \
+                    mv response00 "../${downloaded_file_name}.headers" && \
+                    mv response01 "${downloaded_file_name}" && \
+                    rm "tmp/$downloaded_file_name"
+            else
+                mv "tmp/${downloaded_file_name}" "${downloaded_file_name}"
             fi
-        }
 
-        md5sum_of_downloaded=$(md5sum --binary "~{download_subdir_local}/${downloaded_file_name}" | cut -f1 -d' ' | tee MD5_SUM_OF_DOWNLOADED_FILE)
+            # alternative python implementation to split response headers from body
+            #   via https://stackoverflow.com/a/75483099
+            #python3 << CODE
+            #if ~{if save_response_header_to_file then "True" else "False"}:
+            #    with open("tmp/${downloaded_file_name}", "rb") as f_downloaded:
+            #        headers, body = f_downloaded.read().split(b"\r\n\r\n", 1)
+            #        # write the response header to a file
+            #        with open("${downloaded_file_name}.headers", "wb") as f_headers:
+            #            f_headers.write(headers)
+            #            f_headers.write(b"\r\n")
+            #        # save the file body to its final location
+            #        with open("${downloaded_file_name}", "wb") as f:
+            #            f.write(body)
+            #else:
+            #    ## if headers are not being saved, move the file to its final destination
+            #    import shutil
+            #    shutil.move("tmp/${downloaded_file_name}","${downloaded_file_name}")
+            #CODE
+            
+            rm -r "tmp"
 
-        if ~{if defined(md5_hash_expected) then 'true' else 'false'}; then
-            md5_hash_expected="~{md5_hash_expected}"
-            check_md5_sum "$md5_hash_expected" "$md5sum_of_downloaded"
-        fi
-        if ~{if defined(md5_hash_expected_file_url) then 'true' else 'false'}; then
-            md5_hash_expected="$(curl --silent ~{md5_hash_expected_file_url} | cut -f1 -d' ')"
-            check_md5_sum "$md5_hash_expected" "$md5sum_of_downloaded"
-        fi
+            popd # return to job working directory
+
+            check_md5_sum() {
+                # $1 =  md5sum expected
+                # $2 =  md5sum of downloaded file
+                if [[ "$1" != "$2" ]]; then
+                    echo "ERROR: md5sum of downloaded file ($2) did not match md5sum expected ($1)";
+                    exit 1
+                fi
+            }
+
+            md5sum_of_downloaded=$(md5sum --binary "~{download_subdir_local}/${downloaded_file_name}" | cut -f1 -d' ' | tee MD5_SUM_OF_DOWNLOADED_FILE)
 
-        # report the file size, in bytes
-        printf "Downloaded file size (bytes): " && stat --format=%s  "~{download_subdir_local}/${downloaded_file_name}" | tee SIZE_OF_DOWNLOADED_FILE_BYTES
+            if ~{if defined(md5_hash_expected) then 'true' else 'false'}; then
+                md5_hash_expected="~{md5_hash_expected}"
+                check_md5_sum "$md5_hash_expected" "$md5sum_of_downloaded"
+            fi
+            if ~{if defined(md5_hash_expected_file_url) then 'true' else 'false'}; then
+                md5_hash_expected="$(curl --silent ~{md5_hash_expected_file_url} | cut -f1 -d' ')"
+                check_md5_sum "$md5_hash_expected" "$md5sum_of_downloaded"
+            fi
+
+            # report the file size, in bytes
+            printf "Downloaded file size (bytes): " && stat --format=%s  "~{download_subdir_local}/${downloaded_file_name}" | tee SIZE_OF_DOWNLOADED_FILE_BYTES
+            touch FILE_LOCATION
+            echo "true" > WAS_HTTP_DOWNLOAD
+            echo $(realpath "~{download_subdir_local}/${downloaded_file_name}") > FILE_LOCATION
+        else
+            echo "Only URLs beginning with 'http://' or 'https://' can be downloaded; passing through input url to directly to output..."
+            echo "~{url_to_download}" > FILE_LOCATION
+            printf "0" > SIZE_OF_DOWNLOADED_FILE_BYTES
+            printf "" > MD5_SUM_OF_DOWNLOADED_FILE
+            echo "false" > WAS_HTTP_DOWNLOAD
+        fi
     >>>
     runtime {
         docker: "quay.io/broadinstitute/viral-baseimage:0.2.4"
@@ -527,12 +557,26 @@ task download_from_url {
         maxRetries: 0
         preemptible: 1
     }
-    output {
-        File  downloaded_response_file    = glob("downloaded/*")[0]
-        File? downloaded_response_headers = basename(downloaded_response_file) + ".headers"
 
-        Int    file_size_bytes          = read_int("SIZE_OF_DOWNLOADED_FILE_BYTES")
-        String md5_sum_of_response_file = read_string("MD5_SUM_OF_DOWNLOADED_FILE")
+    # placeholders to output null until WDL supports null literals
+    Int?    nullIntPlaceholder 
+    String? nullStrPlaceholder
+
+    # output files
+    output {
+        # one or the other will be returned, depending on the download method
+        # an http[s] url will be downloaded to a file and available via downloaded_response_file
+        # other urls (i.e. localizable paths like 'gs://*') will be available via passthrough_url
+        # When consuming this task, select the relevant output via:
+        #   select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
+        File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
+        String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
+
+        File?   downloaded_response_headers = if ( defined(downloaded_response_file) ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder
+        String? md5_sum_of_response_file    = if ( defined(downloaded_response_file) ) then read_string("MD5_SUM_OF_DOWNLOADED_FILE") else nullStrPlaceholder
+        Int?    file_size_bytes             = if ( defined(downloaded_response_file) ) then floor(size(downloaded_response_file)) else nullIntPlaceholder 
+        
+        Boolean passed_through_input_url_instead_of_downloading = if ( defined(downloaded_response_file) ) then false else true
 
         File stdout = stdout()
         File stderr = stderr()
diff --git a/pipes/WDL/workflows/download_file.wdl b/pipes/WDL/workflows/download_file.wdl
new file mode 100644
index 000000000..e3239eedd
--- /dev/null
+++ b/pipes/WDL/workflows/download_file.wdl
@@ -0,0 +1,33 @@
+version 1.0
+
+#DX_SKIP_WORKFLOW
+
+import "../tasks/tasks_utils.wdl" as terra
+
+workflow download_file {
+    meta {
+        description: "Downloads an http[s] file. Helpful if this is not natively supported by the WDL execution backend for File inputs."
+        author: "Broad Viral Genomics"
+        email:  "viral-ngs@broadinstitute.org"
+    }
+
+    call terra.download_from_url
+
+    output {
+        File output_file                      = select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
+
+        # one or the other will be returned, depending on the download method
+        # an http[s] url will be downloaded to a file and available via downloaded_response_file
+        # other urls (i.e. localizable paths like 'gs://*', 'drs://') will be available via passthrough_url
+        File?   downloaded_response_file      = download_from_url.downloaded_response_file
+        String? passthrough_url               = download_from_url.passthrough_url
+        
+        # optional fields only returned in the case of a downloaded file
+        File?   downloaded_response_headers   = download_from_url.downloaded_response_headers
+        String? md5_sum_of_response_file      = download_from_url.md5_sum_of_response_file
+        Int?    file_size_bytes               = download_from_url.file_size_bytes
+
+        # boolean flag to indicate if the download task passed through the input url instead of downloading the file
+        Boolean passed_through_input_url_instead_of_downloading  = download_from_url.passed_through_input_url_instead_of_downloading
+    }
+}

From b892b3258feba2f0cd97f907e3f121dcc24f6b5e Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 13:07:16 -0400
Subject: [PATCH 03/26] viral-core 2.4.1->2.4.2

---
 pipes/WDL/tasks/tasks_assembly.wdl     |  4 ++--
 pipes/WDL/tasks/tasks_demux.wdl        |  4 ++--
 pipes/WDL/tasks/tasks_interhost.wdl    |  2 +-
 pipes/WDL/tasks/tasks_ncbi.wdl         | 10 +++++-----
 pipes/WDL/tasks/tasks_nextstrain.wdl   |  4 ++--
 pipes/WDL/tasks/tasks_read_utils.wdl   | 14 +++++++-------
 pipes/WDL/tasks/tasks_reports.wdl      | 12 ++++++------
 pipes/WDL/tasks/tasks_taxon_filter.wdl |  2 +-
 pipes/WDL/tasks/tasks_terra.wdl        |  4 ++--
 pipes/WDL/tasks/tasks_utils.wdl        | 10 +++++-----
 requirements-modules.txt               |  2 +-
 11 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl
index 0df79fa70..d54a188f4 100644
--- a/pipes/WDL/tasks/tasks_assembly.wdl
+++ b/pipes/WDL/tasks/tasks_assembly.wdl
@@ -583,7 +583,7 @@ task align_reads {
     Boolean  skip_mark_dupes = false
 
     Int?     machine_mem_gb
-    String   docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String   docker = "quay.io/broadinstitute/viral-core:2.4.2"
 
     String   sample_name = basename(basename(basename(reads_unmapped_bam, ".bam"), ".taxfilt"), ".clean")
   }
@@ -849,7 +849,7 @@ task run_discordance {
       String out_basename = "run"
       Int    min_coverage = 4
 
-      String docker = "quay.io/broadinstitute/viral-core:2.4.1"
+      String docker = "quay.io/broadinstitute/viral-core:2.4.2"
     }
     parameter_meta {
       reads_aligned_bam: {
diff --git a/pipes/WDL/tasks/tasks_demux.wdl b/pipes/WDL/tasks/tasks_demux.wdl
index 2d0bbe3b4..843f2a0ac 100644
--- a/pipes/WDL/tasks/tasks_demux.wdl
+++ b/pipes/WDL/tasks/tasks_demux.wdl
@@ -6,7 +6,7 @@ task merge_tarballs {
     String       out_filename
 
     Int?         machine_mem_gb
-    String       docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String       docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   Int disk_size = 2625
@@ -163,7 +163,7 @@ task illumina_demux {
 
     Int?    machine_mem_gb
     Int     disk_size = 2625
-    String  docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String  docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   parameter_meta {
diff --git a/pipes/WDL/tasks/tasks_interhost.wdl b/pipes/WDL/tasks/tasks_interhost.wdl
index 043399375..a9d58f5ec 100644
--- a/pipes/WDL/tasks/tasks_interhost.wdl
+++ b/pipes/WDL/tasks/tasks_interhost.wdl
@@ -351,7 +351,7 @@ task index_ref {
     File?  novocraft_license
 
     Int?   machine_mem_gb
-    String docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   Int disk_size = 100
diff --git a/pipes/WDL/tasks/tasks_ncbi.wdl b/pipes/WDL/tasks/tasks_ncbi.wdl
index aa50bc84e..83d29aab3 100644
--- a/pipes/WDL/tasks/tasks_ncbi.wdl
+++ b/pipes/WDL/tasks/tasks_ncbi.wdl
@@ -79,7 +79,7 @@ task sequencing_platform_from_bam {
   input {
     File    bam
 
-    String  docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String  docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   command <<<
@@ -188,7 +188,7 @@ task structured_comments {
 
     File?  filter_to_ids
 
-    String docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
   String out_base = basename(assembly_stats_tsv, '.txt')
   command <<<
@@ -241,7 +241,7 @@ task structured_comments_from_aligned_bam {
     String  out_basename = basename(aligned_bam, '.bam')
     Boolean is_genome_assembly = true
     Boolean sanitize_ids = true
-    String  docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String  docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
   # see https://www.ncbi.nlm.nih.gov/genbank/structuredcomment/
   command <<<
@@ -360,7 +360,7 @@ task rename_fasta_header {
 
     String out_basename = basename(genome_fasta, ".fasta")
 
-    String docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
   command {
     set -e
@@ -525,7 +525,7 @@ task sra_meta_prep {
     Boolean     paired
 
     String      out_name = "sra_metadata.tsv"
-    String      docker="quay.io/broadinstitute/viral-core:2.4.1"
+    String      docker="quay.io/broadinstitute/viral-core:2.4.2"
   }
   Int disk_size = 100
   parameter_meta {
diff --git a/pipes/WDL/tasks/tasks_nextstrain.wdl b/pipes/WDL/tasks/tasks_nextstrain.wdl
index af2cbaef0..5f27babf8 100644
--- a/pipes/WDL/tasks/tasks_nextstrain.wdl
+++ b/pipes/WDL/tasks/tasks_nextstrain.wdl
@@ -332,7 +332,7 @@ task derived_cols {
         String?       lab_highlight_loc
         Array[File]   table_map = []
 
-        String        docker = "quay.io/broadinstitute/viral-core:2.4.1"
+        String        docker = "quay.io/broadinstitute/viral-core:2.4.2"
         Int           disk_size = 50
     }
     parameter_meta {
@@ -900,7 +900,7 @@ task filter_sequences_to_list {
 
         String       out_fname = sub(sub(basename(sequences, ".zst"), ".vcf", ".filtered.vcf"), ".fasta$", ".filtered.fasta")
         # Prior docker image: "nextstrain/base:build-20240318T173028Z"
-        String       docker = "quay.io/broadinstitute/viral-core:2.4.1"
+        String       docker = "quay.io/broadinstitute/viral-core:2.4.2"
         Int          disk_size = 750
     }
     parameter_meta {
diff --git a/pipes/WDL/tasks/tasks_read_utils.wdl b/pipes/WDL/tasks/tasks_read_utils.wdl
index b077eb572..e2603b5a6 100644
--- a/pipes/WDL/tasks/tasks_read_utils.wdl
+++ b/pipes/WDL/tasks/tasks_read_utils.wdl
@@ -84,7 +84,7 @@ task group_bams_by_sample {
 task get_bam_samplename {
   input {
     File    bam
-    String  docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String  docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
   Int   disk_size = round(size(bam, "GB")) + 50
   command <<<
@@ -111,7 +111,7 @@ task get_sample_meta {
   input {
     Array[File] samplesheets_extended
 
-    String      docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String      docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
   Int disk_size = 50
   command <<<
@@ -172,7 +172,7 @@ task merge_and_reheader_bams {
       File?        reheader_table
       String       out_basename = basename(in_bams[0], ".bam")
 
-      String       docker = "quay.io/broadinstitute/viral-core:2.4.1"
+      String       docker = "quay.io/broadinstitute/viral-core:2.4.2"
       Int          disk_size = 750
       Int          machine_mem_gb = 4
     }
@@ -244,7 +244,7 @@ task rmdup_ubam {
     String  method = "mvicuna"
 
     Int     machine_mem_gb = 7
-    String  docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String  docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   Int disk_size = 375 + 2 * ceil(size(reads_unmapped_bam, "GB"))
@@ -303,7 +303,7 @@ task downsample_bams {
     Boolean      deduplicateAfter = false
 
     Int?         machine_mem_gb
-    String       docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String       docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   Int disk_size = 750
@@ -370,7 +370,7 @@ task FastqToUBAM {
     Int     cpus = 2
     Int     mem_gb = 4
     Int     disk_size = 750
-    String  docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String  docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
   parameter_meta {
     fastq_1: { description: "Unaligned read1 file in fastq format", patterns: ["*.fastq", "*.fastq.gz", "*.fq", "*.fq.gz"] }
@@ -424,7 +424,7 @@ task read_depths {
     File      aligned_bam
 
     String    out_basename = basename(aligned_bam, '.bam')
-    String    docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String    docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
   Int disk_size = 200
   command <<<
diff --git a/pipes/WDL/tasks/tasks_reports.wdl b/pipes/WDL/tasks/tasks_reports.wdl
index 66de02e11..5441faac0 100644
--- a/pipes/WDL/tasks/tasks_reports.wdl
+++ b/pipes/WDL/tasks/tasks_reports.wdl
@@ -15,7 +15,7 @@ task alignment_metrics {
     Int    max_amplicons=500
 
     Int    machine_mem_gb=32
-    String docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   String out_basename = basename(aligned_bam, ".bam")
@@ -142,7 +142,7 @@ task plot_coverage {
     String? plotXLimits # of the form "min max" (ints, space between)
     String? plotYLimits # of the form "min max" (ints, space between)
 
-    String  docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String  docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   Int disk_size = 375
@@ -289,7 +289,7 @@ task coverage_report {
     Array[File]  mapped_bam_idx = []  # optional.. speeds it up if you provide it, otherwise we auto-index
     String       out_report_name = "coverage_report.txt"
 
-    String       docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String       docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   Int disk_size = 375
@@ -364,7 +364,7 @@ task fastqc {
   input {
     File   reads_bam
 
-    String docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
   parameter_meta {
     reads_bam:{ 
@@ -412,7 +412,7 @@ task align_and_count {
     Boolean keep_duplicates_when_filtering                    = false
 
     Int?   machine_mem_gb
-    String docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   String  reads_basename=basename(reads_bam, ".bam")
@@ -535,7 +535,7 @@ task align_and_count_summary {
 
     String       output_prefix = "count_summary"
 
-    String       docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String       docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   Int disk_size = 100
diff --git a/pipes/WDL/tasks/tasks_taxon_filter.wdl b/pipes/WDL/tasks/tasks_taxon_filter.wdl
index 5b44654ab..fd47ff3a2 100644
--- a/pipes/WDL/tasks/tasks_taxon_filter.wdl
+++ b/pipes/WDL/tasks/tasks_taxon_filter.wdl
@@ -211,7 +211,7 @@ task merge_one_per_sample {
     Boolean      rmdup = false
 
     Int          machine_mem_gb = 7
-    String       docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String       docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   Int disk_size = 750
diff --git a/pipes/WDL/tasks/tasks_terra.wdl b/pipes/WDL/tasks/tasks_terra.wdl
index b7cba9e18..820a3a2c4 100644
--- a/pipes/WDL/tasks/tasks_terra.wdl
+++ b/pipes/WDL/tasks/tasks_terra.wdl
@@ -33,7 +33,7 @@ task gcs_copy {
 
 task check_terra_env {
   input {
-    String docker = "quay.io/broadinstitute/viral-baseimage:0.2.4"
+    String docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
   meta {
     description: "task for inspection of backend to determine whether the task is running on Terra and/or GCP"
@@ -439,7 +439,7 @@ task create_or_update_sample_tables {
     String  sample_table_name  = "sample"
     String  library_table_name = "library"
 
-    String  docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String  docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   meta {
diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index 758462818..e631cffd3 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -52,7 +52,7 @@ task unpack_archive_to_bucket_path {
         # execution and resource requirements
         Int    disk_size      = 500
         Int    machine_mem_gb = 128
-        String docker         = "quay.io/broadinstitute/viral-core:2.4.1"
+        String docker         = "quay.io/broadinstitute/viral-core:2.4.2"
     }
 
     parameter_meta {
@@ -293,7 +293,7 @@ task zcat {
         { if [ -f /sys/fs/cgroup/memory.peak ]; then cat /sys/fs/cgroup/memory.peak; elif [ -f /sys/fs/cgroup/memory/memory.peak ]; then cat /sys/fs/cgroup/memory/memory.peak; elif [ -f /sys/fs/cgroup/memory/memory.max_usage_in_bytes ]; then cat /sys/fs/cgroup/memory/memory.max_usage_in_bytes; else echo "0"; fi } > MEM_BYTES
     >>>
     runtime {
-        docker: "quay.io/broadinstitute/viral-core:2.4.1"
+        docker: "quay.io/broadinstitute/viral-core:2.4.2"
         memory: "1 GB"
         cpu:    cpus
         disks:  "local-disk " + disk_size + " LOCAL"
@@ -901,7 +901,7 @@ task tsv_join {
   runtime {
     memory: "~{machine_mem_gb} GB"
     cpu: 4
-    docker: "quay.io/broadinstitute/viral-core:2.4.1"
+    docker: "quay.io/broadinstitute/viral-core:2.4.2"
     disks:  "local-disk " + disk_size + " HDD"
     disk: disk_size + " GB" # TES
     dx_instance_type: "mem1_ssd1_v2_x4"
@@ -988,7 +988,7 @@ task tsv_stack {
   input {
     Array[File]+ input_tsvs
     String       out_basename
-    String       docker = "quay.io/broadinstitute/viral-core:2.4.1"
+    String       docker = "quay.io/broadinstitute/viral-core:2.4.2"
   }
 
   Int disk_size = 50
@@ -1259,7 +1259,7 @@ task filter_sequences_by_length {
         File   sequences_fasta
         Int    min_non_N = 1
 
-        String docker = "quay.io/broadinstitute/viral-core:2.4.1"
+        String docker = "quay.io/broadinstitute/viral-core:2.4.2"
         Int    disk_size = 750
     }
     parameter_meta {
diff --git a/requirements-modules.txt b/requirements-modules.txt
index da9a876c7..1dcbe964a 100644
--- a/requirements-modules.txt
+++ b/requirements-modules.txt
@@ -1,5 +1,5 @@
 broadinstitute/viral-baseimage=0.2.4
-broadinstitute/viral-core=2.4.1
+broadinstitute/viral-core=2.4.2
 broadinstitute/viral-assemble=2.4.1.0
 broadinstitute/viral-classify=2.2.5
 broadinstitute/viral-phylo=2.4.1.0

From f3e35e1365aaa62a32ea545738d6bcfabcedec8a Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 13:22:12 -0400
Subject: [PATCH 04/26] `scaffold_and_refine_multitaxa`workflow: use the
 `download_file` task to allow the workflow to consume
 `taxid_to_ref_accessions_tsv` input specified from either a `gs://` or
 `http[s]` source

`scaffold_and_refine_multitaxa`workflow: use the `download_file` task to allow the workflow to consume its `taxid_to_ref_accessions_tsv` input from a path specified using `gs://` *or* `http[s]`.
---
 pipes/WDL/workflows/download_file.wdl                | 11 +++++++++--
 .../WDL/workflows/scaffold_and_refine_multitaxa.wdl  | 12 +++++++++---
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/pipes/WDL/workflows/download_file.wdl b/pipes/WDL/workflows/download_file.wdl
index e3239eedd..32c98a45b 100644
--- a/pipes/WDL/workflows/download_file.wdl
+++ b/pipes/WDL/workflows/download_file.wdl
@@ -11,10 +11,17 @@ workflow download_file {
         email:  "viral-ngs@broadinstitute.org"
     }
 
-    call terra.download_from_url
+    input {
+        String path_utl
+    }
+
+    call terra.download_from_url {
+        input:
+            url_to_download = path_utl
+    }
 
     output {
-        File output_file                      = select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
+        File    file_path                     = select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
 
         # one or the other will be returned, depending on the download method
         # an http[s] url will be downloaded to a file and available via downloaded_response_file
diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
index b42dfb7bf..09808234b 100644
--- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
+++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
@@ -4,6 +4,7 @@ import "../tasks/tasks_assembly.wdl" as assembly
 import "../tasks/tasks_ncbi.wdl" as ncbi
 import "../tasks/tasks_utils.wdl" as utils
 import "assemble_refbased.wdl" as assemble_refbased
+import "download_file.wdl" as download_file
 
 workflow scaffold_and_refine_multitaxa {
     meta {
@@ -19,7 +20,7 @@ workflow scaffold_and_refine_multitaxa {
         File    reads_unmapped_bam
         File    contigs_fasta
 
-        File    taxid_to_ref_accessions_tsv
+        String  taxid_to_ref_accessions_tsv
         String? email_address
 
         String? biosample_accession
@@ -34,8 +35,13 @@ workflow scaffold_and_refine_multitaxa {
     # 3. (empty string fallback)
     String? user_email_address = select_first([email_address,check_terra_env.user_email, ""])
 
+    call download_file.download_file as dl_taxid_to_ref_tsv {
+        input:
+            url = taxid_to_ref_accessions_tsv
+    }
+
     # download (multi-segment) genomes for each reference, fasta filename = colon-concatenated accession list
-    scatter(taxon in read_tsv(taxid_to_ref_accessions_tsv)) {
+    scatter(taxon in read_tsv(dl_taxid_to_ref_tsv.file_path)) {
         # taxon = [taxid, isolate_prefix, taxname, semicolon_delim_accession_list]
         call utils.string_split {
             input:
@@ -90,7 +96,7 @@ workflow scaffold_and_refine_multitaxa {
         # get taxid and taxname from taxid_to_ref_accessions_tsv
         call utils.fetch_row_from_tsv as tax_lookup {
             input:
-                tsv = taxid_to_ref_accessions_tsv,
+                tsv = dl_taxid_to_ref_tsv.file_path,
                 idx_col = "accessions",
                 idx_val = sub(scaffold.scaffolding_chosen_ref_basename, "-", ":"),
                 add_header = ["taxid", "isolate_prefix", "taxname", "accessions"]

From 7be105c789dc29c92bc811993241027580d9aee3 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 14:24:27 -0400
Subject: [PATCH 05/26] call terra.check_terra_env

---
 pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
index 09808234b..f72e39707 100644
--- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
+++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
@@ -33,7 +33,8 @@ workflow scaffold_and_refine_multitaxa {
     # 1. email_address provided via WDL input
     # 2. user_email determined by introspection via check_terra_env task
     # 3. (empty string fallback)
-    String? user_email_address = select_first([email_address,check_terra_env.user_email, ""])
+    call terra.check_terra_env
+    String user_email_address = select_first([email_address,check_terra_env.user_email, ""])
 
     call download_file.download_file as dl_taxid_to_ref_tsv {
         input:

From 482ceb406ab6804df2bc4bb181d4697ad38f27cc Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 14:28:34 -0400
Subject: [PATCH 06/26] move null placeholders to input block of
 download_from_url to satisfy womtool checks

---
 pipes/WDL/tasks/tasks_utils.wdl | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index e631cffd3..9e307c1c3 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -390,6 +390,10 @@ task download_from_url {
         Boolean save_response_header_to_file = false
 
         Int     disk_size = 50
+
+        # Do not use these inputs; they are placeholders to output null until WDL supports null literals
+        Int?    _nullIntPlaceholder 
+        String? _nullStrPlaceholder
     }
 
     parameter_meta {
@@ -558,10 +562,6 @@ task download_from_url {
         preemptible: 1
     }
 
-    # placeholders to output null until WDL supports null literals
-    Int?    nullIntPlaceholder 
-    String? nullStrPlaceholder
-
     # output files
     output {
         # one or the other will be returned, depending on the download method
@@ -569,12 +569,12 @@ task download_from_url {
         # other urls (i.e. localizable paths like 'gs://*') will be available via passthrough_url
         # When consuming this task, select the relevant output via:
         #   select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
-        File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
-        String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
+        File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else _nullStrPlaceholder
+        String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then _nullStrPlaceholder else url_to_download
 
-        File?   downloaded_response_headers = if ( defined(downloaded_response_file) ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder
-        String? md5_sum_of_response_file    = if ( defined(downloaded_response_file) ) then read_string("MD5_SUM_OF_DOWNLOADED_FILE") else nullStrPlaceholder
-        Int?    file_size_bytes             = if ( defined(downloaded_response_file) ) then floor(size(downloaded_response_file)) else nullIntPlaceholder 
+        File?   downloaded_response_headers = if ( defined(downloaded_response_file) ) then basename(read_string("FILE_LOCATION")) + ".headers" else _nullStrPlaceholder
+        String? md5_sum_of_response_file    = if ( defined(downloaded_response_file) ) then read_string("MD5_SUM_OF_DOWNLOADED_FILE") else _nullStrPlaceholder
+        Int?    file_size_bytes             = if ( defined(downloaded_response_file) ) then floor(size(downloaded_response_file)) else _nullIntPlaceholder 
         
         Boolean passed_through_input_url_instead_of_downloading = if ( defined(downloaded_response_file) ) then false else true
 

From 1250ed474b31a40c9bc18612b105a4edf53048dd Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 14:31:35 -0400
Subject: [PATCH 07/26] WDL (1.0) doesn't allow leading underscores in variable
 names

WDL doesn't allow leading underscores in variable names:
https://github.com/openwdl/wdl/blob/legacy/versions/1.0/SPEC.md#whitespace-strings-identifiers-constants
---
 pipes/WDL/tasks/tasks_utils.wdl | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index 9e307c1c3..40b23af73 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -392,8 +392,8 @@ task download_from_url {
         Int     disk_size = 50
 
         # Do not use these inputs; they are placeholders to output null until WDL supports null literals
-        Int?    _nullIntPlaceholder 
-        String? _nullStrPlaceholder
+        Int?    nullIntPlaceholder 
+        String? nullStrPlaceholder
     }
 
     parameter_meta {
@@ -569,12 +569,12 @@ task download_from_url {
         # other urls (i.e. localizable paths like 'gs://*') will be available via passthrough_url
         # When consuming this task, select the relevant output via:
         #   select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
-        File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else _nullStrPlaceholder
-        String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then _nullStrPlaceholder else url_to_download
+        File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
+        String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
 
-        File?   downloaded_response_headers = if ( defined(downloaded_response_file) ) then basename(read_string("FILE_LOCATION")) + ".headers" else _nullStrPlaceholder
-        String? md5_sum_of_response_file    = if ( defined(downloaded_response_file) ) then read_string("MD5_SUM_OF_DOWNLOADED_FILE") else _nullStrPlaceholder
-        Int?    file_size_bytes             = if ( defined(downloaded_response_file) ) then floor(size(downloaded_response_file)) else _nullIntPlaceholder 
+        File?   downloaded_response_headers = if ( defined(downloaded_response_file) ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder
+        String? md5_sum_of_response_file    = if ( defined(downloaded_response_file) ) then read_string("MD5_SUM_OF_DOWNLOADED_FILE") else nullStrPlaceholder
+        Int?    file_size_bytes             = if ( defined(downloaded_response_file) ) then floor(size(downloaded_response_file)) else nullIntPlaceholder 
         
         Boolean passed_through_input_url_instead_of_downloading = if ( defined(downloaded_response_file) ) then false else true
 

From 75414c8fd89c93c9daf0b6b227f0ce936c566f33 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 14:36:02 -0400
Subject: [PATCH 08/26] import "../tasks/tasks_terra.wdl" as terra

---
 pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
index f72e39707..e4c8bb467 100644
--- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
+++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
@@ -3,6 +3,7 @@ version 1.0
 import "../tasks/tasks_assembly.wdl" as assembly
 import "../tasks/tasks_ncbi.wdl" as ncbi
 import "../tasks/tasks_utils.wdl" as utils
+import "../tasks/tasks_terra.wdl" as terra
 import "assemble_refbased.wdl" as assemble_refbased
 import "download_file.wdl" as download_file
 
@@ -29,11 +30,12 @@ workflow scaffold_and_refine_multitaxa {
     Int    min_scaffold_unambig = 300 # in base-pairs; any scaffolded assembly < this length will not be refined/polished
     String sample_original_name = select_first([sample_name, sample_id])
 
+    call terra.check_terra_env
+
     # get user email address, with the following precedence:
     # 1. email_address provided via WDL input
     # 2. user_email determined by introspection via check_terra_env task
     # 3. (empty string fallback)
-    call terra.check_terra_env
     String user_email_address = select_first([email_address,check_terra_env.user_email, ""])
 
     call download_file.download_file as dl_taxid_to_ref_tsv {

From ae324e46a12e773f32f13ae2ade50d69138cbf12 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 14:42:54 -0400
Subject: [PATCH 09/26] fix typo in path_url

---
 pipes/WDL/workflows/download_file.wdl                 | 4 ++--
 pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pipes/WDL/workflows/download_file.wdl b/pipes/WDL/workflows/download_file.wdl
index 32c98a45b..abae32780 100644
--- a/pipes/WDL/workflows/download_file.wdl
+++ b/pipes/WDL/workflows/download_file.wdl
@@ -12,12 +12,12 @@ workflow download_file {
     }
 
     input {
-        String path_utl
+        String path_url
     }
 
     call terra.download_from_url {
         input:
-            url_to_download = path_utl
+            url_to_download = path_url
     }
 
     output {
diff --git a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
index e4c8bb467..a0e47e414 100644
--- a/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
+++ b/pipes/WDL/workflows/scaffold_and_refine_multitaxa.wdl
@@ -40,7 +40,7 @@ workflow scaffold_and_refine_multitaxa {
 
     call download_file.download_file as dl_taxid_to_ref_tsv {
         input:
-            url = taxid_to_ref_accessions_tsv
+            path_url = taxid_to_ref_accessions_tsv
     }
 
     # download (multi-segment) genomes for each reference, fasta filename = colon-concatenated accession list

From 4f5ff5279e314846fd5e1ca44c0545d1dc6f299a Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 15:34:11 -0400
Subject: [PATCH 10/26] debugging download_from_url delocalization on Terra

debugging download_from_url delocalization on Terra
---
 pipes/WDL/tasks/tasks_utils.wdl | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index 40b23af73..dcfb9a0b4 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -518,6 +518,9 @@ task download_from_url {
 
             popd # return to job working directory
 
+            echo "ls -lah $(pwd)"
+            ls -lah
+
             check_md5_sum() {
                 # $1 =  md5sum expected
                 # $2 =  md5sum of downloaded file
@@ -542,10 +545,16 @@ task download_from_url {
             printf "Downloaded file size (bytes): " && stat --format=%s  "~{download_subdir_local}/${downloaded_file_name}" | tee SIZE_OF_DOWNLOADED_FILE_BYTES
             touch FILE_LOCATION
             echo "true" > WAS_HTTP_DOWNLOAD
-            echo $(realpath "~{download_subdir_local}/${downloaded_file_name}") > FILE_LOCATION
+            downloaded_file_realpath=$(realpath "~{download_subdir_local}/${downloaded_file_name}")
+            
+            echo '~{download_subdir_local}/${downloaded_file_name}: '"~{download_subdir_local}/${downloaded_file_name}"
+            echo '${downloaded_file_realpath}: '"${downloaded_file_realpath}"
+            
+            echo "${downloaded_file_realpath}" | tee FILE_LOCATION
         else
             echo "Only URLs beginning with 'http://' or 'https://' can be downloaded; passing through input url to directly to output..."
-            echo "~{url_to_download}" > FILE_LOCATION
+            #echo "~{url_to_download}" > FILE_LOCATION
+            echo "" > FILE_LOCATION
             printf "0" > SIZE_OF_DOWNLOADED_FILE_BYTES
             printf "" > MD5_SUM_OF_DOWNLOADED_FILE
             echo "false" > WAS_HTTP_DOWNLOAD
@@ -569,6 +578,7 @@ task download_from_url {
         # other urls (i.e. localizable paths like 'gs://*') will be available via passthrough_url
         # When consuming this task, select the relevant output via:
         #   select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
+        File?   downloaded_response_file_debug = read_string("FILE_LOCATION")
         File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
         String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
 

From ccb1f0c13b272bd19347fc9c2b1d54166bbf2a4c Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 15:35:16 -0400
Subject: [PATCH 11/26] debug

---
 pipes/WDL/tasks/tasks_utils.wdl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index dcfb9a0b4..a117fbca1 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -521,6 +521,9 @@ task download_from_url {
             echo "ls -lah $(pwd)"
             ls -lah
 
+            echo "ls -lah $(pwd)/~{download_subdir_local}"
+            ls -lah ~{download_subdir_local}
+
             check_md5_sum() {
                 # $1 =  md5sum expected
                 # $2 =  md5sum of downloaded file

From 524e9e0f05646464ed73ebc5ed4da7b535556ac6 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 15:49:29 -0400
Subject: [PATCH 12/26] debug delocalization: try relative rather than absolute
 path

---
 pipes/WDL/tasks/tasks_utils.wdl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index a117fbca1..65dbfb2c5 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -553,7 +553,8 @@ task download_from_url {
             echo '~{download_subdir_local}/${downloaded_file_name}: '"~{download_subdir_local}/${downloaded_file_name}"
             echo '${downloaded_file_realpath}: '"${downloaded_file_realpath}"
             
-            echo "${downloaded_file_realpath}" | tee FILE_LOCATION
+            #echo "${downloaded_file_realpath}" | tee FILE_LOCATION
+            echo "~{download_subdir_local}/${downloaded_file_name}" | tee FILE_LOCATION
         else
             echo "Only URLs beginning with 'http://' or 'https://' can be downloaded; passing through input url to directly to output..."
             #echo "~{url_to_download}" > FILE_LOCATION

From f84f0e46d7c2ce5bb3542c24e52fb49b19ce0f7a Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 15:53:09 -0400
Subject: [PATCH 13/26] debug download_from_url

---
 pipes/WDL/tasks/tasks_utils.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index 65dbfb2c5..58212cc99 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -586,7 +586,7 @@ task download_from_url {
         File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
         String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
 
-        File?   downloaded_response_headers = if ( defined(downloaded_response_file) ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder
+        File?   downloaded_response_headers = if ( defined(downloaded_response_file) ) then read_string("FILE_LOCATION") + ".headers" else nullStrPlaceholder
         String? md5_sum_of_response_file    = if ( defined(downloaded_response_file) ) then read_string("MD5_SUM_OF_DOWNLOADED_FILE") else nullStrPlaceholder
         Int?    file_size_bytes             = if ( defined(downloaded_response_file) ) then floor(size(downloaded_response_file)) else nullIntPlaceholder 
         

From e3b4b01f7120fa56aa74269a781524b8d4f64f5e Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Tue, 25 Mar 2025 15:57:59 -0400
Subject: [PATCH 14/26] debug continued

---
 pipes/WDL/tasks/tasks_utils.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index 58212cc99..65dbfb2c5 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -586,7 +586,7 @@ task download_from_url {
         File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
         String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
 
-        File?   downloaded_response_headers = if ( defined(downloaded_response_file) ) then read_string("FILE_LOCATION") + ".headers" else nullStrPlaceholder
+        File?   downloaded_response_headers = if ( defined(downloaded_response_file) ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder
         String? md5_sum_of_response_file    = if ( defined(downloaded_response_file) ) then read_string("MD5_SUM_OF_DOWNLOADED_FILE") else nullStrPlaceholder
         Int?    file_size_bytes             = if ( defined(downloaded_response_file) ) then floor(size(downloaded_response_file)) else nullIntPlaceholder 
         

From 369b0ba2cb05fbf5eb1835e64e7eaa8909e76dbd Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 26 Mar 2025 03:02:30 -0400
Subject: [PATCH 15/26] debug

---
 pipes/WDL/tasks/tasks_utils.wdl | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index 65dbfb2c5..33a6cb5e1 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -518,12 +518,6 @@ task download_from_url {
 
             popd # return to job working directory
 
-            echo "ls -lah $(pwd)"
-            ls -lah
-
-            echo "ls -lah $(pwd)/~{download_subdir_local}"
-            ls -lah ~{download_subdir_local}
-
             check_md5_sum() {
                 # $1 =  md5sum expected
                 # $2 =  md5sum of downloaded file
@@ -555,6 +549,12 @@ task download_from_url {
             
             #echo "${downloaded_file_realpath}" | tee FILE_LOCATION
             echo "~{download_subdir_local}/${downloaded_file_name}" | tee FILE_LOCATION
+
+            echo "ls -lah $(pwd)"
+            ls -lah
+
+            echo "ls -lah $(pwd)/~{download_subdir_local}"
+            ls -lah ~{download_subdir_local}
         else
             echo "Only URLs beginning with 'http://' or 'https://' can be downloaded; passing through input url to directly to output..."
             #echo "~{url_to_download}" > FILE_LOCATION
@@ -582,13 +582,14 @@ task download_from_url {
         # other urls (i.e. localizable paths like 'gs://*') will be available via passthrough_url
         # When consuming this task, select the relevant output via:
         #   select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
-        File?   downloaded_response_file_debug = read_string("FILE_LOCATION")
-        File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
+        #File?   downloaded_response_file_debug = read_string("FILE_LOCATION")
+        #File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
+        File?   downloaded_response_file = read_string("FILE_LOCATION")
         String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
 
-        File?   downloaded_response_headers = if ( defined(downloaded_response_file) ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder
+        File?   downloaded_response_headers = if ( defined(downloaded_response_file) and save_response_header_to_file ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder
         String? md5_sum_of_response_file    = if ( defined(downloaded_response_file) ) then read_string("MD5_SUM_OF_DOWNLOADED_FILE") else nullStrPlaceholder
-        Int?    file_size_bytes             = if ( defined(downloaded_response_file) ) then floor(size(downloaded_response_file)) else nullIntPlaceholder 
+        Int?    file_size_bytes             = if ( defined(downloaded_response_file) ) then floor(size(read_string("FILE_LOCATION"))) else nullIntPlaceholder 
         
         Boolean passed_through_input_url_instead_of_downloading = if ( defined(downloaded_response_file) ) then false else true
 

From f346b0192a1274af4f160ebaf3cc5d4de5ecd00b Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 26 Mar 2025 03:03:29 -0400
Subject: [PATCH 16/26] debug

---
 pipes/WDL/tasks/tasks_utils.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index 33a6cb5e1..c4d906671 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -587,7 +587,7 @@ task download_from_url {
         File?   downloaded_response_file = read_string("FILE_LOCATION")
         String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
 
-        File?   downloaded_response_headers = if ( defined(downloaded_response_file) and save_response_header_to_file ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder
+        File?   downloaded_response_headers = if ( defined(downloaded_response_file) && save_response_header_to_file ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder
         String? md5_sum_of_response_file    = if ( defined(downloaded_response_file) ) then read_string("MD5_SUM_OF_DOWNLOADED_FILE") else nullStrPlaceholder
         Int?    file_size_bytes             = if ( defined(downloaded_response_file) ) then floor(size(read_string("FILE_LOCATION"))) else nullIntPlaceholder 
         

From 7dffa3705b7b5a68b0473e6b35244f559e9b8edc Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 26 Mar 2025 03:14:51 -0400
Subject: [PATCH 17/26] touch FILE_LOCATION at start of task

in attempt to resolve "Failed to predict files needed to de-localize from 'read_string'" error occurring *before* task execution
---
 pipes/WDL/tasks/tasks_utils.wdl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index c4d906671..c3b50837e 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -432,7 +432,8 @@ task download_from_url {
         # enforce that only one source of expected md5 hash can be provided
         ~{if defined(md5_hash_expected) && defined(md5_hash_expected_file_url) then 'echo "The inputs \'md5_hash_expected\' and \'md5_hash_expected_file_url\' cannot both be specified; please provide only one."; exit 1;' else ''}
 
-        #touch FILE_LOCATION SIZE_OF_DOWNLOADED_FILE_BYTES MD5_SUM_OF_DOWNLOADED_FILE
+        #touch SIZE_OF_DOWNLOADED_FILE_BYTES MD5_SUM_OF_DOWNLOADED_FILE
+        touch FILE_LOCATION
 
         # if this is an http[s] url, download the file
         # (otherwise just pass through the URL to the 'path_str' output)
@@ -540,7 +541,6 @@ task download_from_url {
 
             # report the file size, in bytes
             printf "Downloaded file size (bytes): " && stat --format=%s  "~{download_subdir_local}/${downloaded_file_name}" | tee SIZE_OF_DOWNLOADED_FILE_BYTES
-            touch FILE_LOCATION
             echo "true" > WAS_HTTP_DOWNLOAD
             downloaded_file_realpath=$(realpath "~{download_subdir_local}/${downloaded_file_name}")
             

From 5c5103f915cc3e20985f2d60abfe9005187d768f Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 26 Mar 2025 03:21:57 -0400
Subject: [PATCH 18/26] debug

---
 pipes/WDL/tasks/tasks_utils.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index c3b50837e..5874b4ab4 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -589,7 +589,7 @@ task download_from_url {
 
         File?   downloaded_response_headers = if ( defined(downloaded_response_file) && save_response_header_to_file ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder
         String? md5_sum_of_response_file    = if ( defined(downloaded_response_file) ) then read_string("MD5_SUM_OF_DOWNLOADED_FILE") else nullStrPlaceholder
-        Int?    file_size_bytes             = if ( defined(downloaded_response_file) ) then floor(size(read_string("FILE_LOCATION"))) else nullIntPlaceholder 
+        Int?    file_size_bytes             = if ( defined(downloaded_response_file) ) then floor(size(downloaded_response_file)) else nullIntPlaceholder 
         
         Boolean passed_through_input_url_instead_of_downloading = if ( defined(downloaded_response_file) ) then false else true
 

From db03042aff243d7f34e0da24fae61a279332bd62 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 26 Mar 2025 03:28:02 -0400
Subject: [PATCH 19/26] debug

---
 pipes/WDL/tasks/tasks_utils.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index 5874b4ab4..de5521302 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -584,7 +584,7 @@ task download_from_url {
         #   select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
         #File?   downloaded_response_file_debug = read_string("FILE_LOCATION")
         #File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
-        File?   downloaded_response_file = read_string("FILE_LOCATION")
+        File?   downloaded_response_file = select_first([glob(download_subdir_local+"/*")[0],""]) #read_string("FILE_LOCATION")
         String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
 
         File?   downloaded_response_headers = if ( defined(downloaded_response_file) && save_response_header_to_file ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder

From 50b226c0968c0a8512cf17c52a5355c3141d46dd Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 26 Mar 2025 03:30:49 -0400
Subject: [PATCH 20/26] debug

---
 pipes/WDL/tasks/tasks_utils.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index de5521302..f6c6fc3f2 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -584,7 +584,7 @@ task download_from_url {
         #   select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
         #File?   downloaded_response_file_debug = read_string("FILE_LOCATION")
         #File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
-        File?   downloaded_response_file = select_first([glob(download_subdir_local+"/*")[0],""]) #read_string("FILE_LOCATION")
+        File?   downloaded_response_file = select_first(flatten([glob(download_subdir_local+"/*"),[""]])) #read_string("FILE_LOCATION")
         String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
 
         File?   downloaded_response_headers = if ( defined(downloaded_response_file) && save_response_header_to_file ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder

From b5dc3b0a581781ee0107cebdcb04d1cb84549458 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 26 Mar 2025 03:44:45 -0400
Subject: [PATCH 21/26] cruft removal

---
 pipes/WDL/tasks/tasks_utils.wdl | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index f6c6fc3f2..9d5352855 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -542,25 +542,12 @@ task download_from_url {
             # report the file size, in bytes
             printf "Downloaded file size (bytes): " && stat --format=%s  "~{download_subdir_local}/${downloaded_file_name}" | tee SIZE_OF_DOWNLOADED_FILE_BYTES
             echo "true" > WAS_HTTP_DOWNLOAD
-            downloaded_file_realpath=$(realpath "~{download_subdir_local}/${downloaded_file_name}")
-            
-            echo '~{download_subdir_local}/${downloaded_file_name}: '"~{download_subdir_local}/${downloaded_file_name}"
-            echo '${downloaded_file_realpath}: '"${downloaded_file_realpath}"
-            
-            #echo "${downloaded_file_realpath}" | tee FILE_LOCATION
             echo "~{download_subdir_local}/${downloaded_file_name}" | tee FILE_LOCATION
-
-            echo "ls -lah $(pwd)"
-            ls -lah
-
-            echo "ls -lah $(pwd)/~{download_subdir_local}"
-            ls -lah ~{download_subdir_local}
         else
             echo "Only URLs beginning with 'http://' or 'https://' can be downloaded; passing through input url to directly to output..."
-            #echo "~{url_to_download}" > FILE_LOCATION
-            echo "" > FILE_LOCATION
-            printf "0" > SIZE_OF_DOWNLOADED_FILE_BYTES
-            printf "" > MD5_SUM_OF_DOWNLOADED_FILE
+            echo ""      > FILE_LOCATION
+            printf "0"   > SIZE_OF_DOWNLOADED_FILE_BYTES
+            printf ""    > MD5_SUM_OF_DOWNLOADED_FILE
             echo "false" > WAS_HTTP_DOWNLOAD
         fi
     >>>

From e3dc5e389e8dc931d2e4952a5667027c51cef04c Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 26 Mar 2025 03:49:01 -0400
Subject: [PATCH 22/26] empty string coersion to optional File? does not seem
 to work on Terra

---
 pipes/WDL/tasks/tasks_utils.wdl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index 9d5352855..dae97b9d6 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -394,6 +394,7 @@ task download_from_url {
         # Do not use these inputs; they are placeholders to output null until WDL supports null literals
         Int?    nullIntPlaceholder 
         String? nullStrPlaceholder
+        File?   nullFilePlaceholder
     }
 
     parameter_meta {
@@ -571,7 +572,7 @@ task download_from_url {
         #   select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
         #File?   downloaded_response_file_debug = read_string("FILE_LOCATION")
         #File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
-        File?   downloaded_response_file = select_first(flatten([glob(download_subdir_local+"/*"),[""]])) #read_string("FILE_LOCATION")
+        File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) select_first(flatten([glob(download_subdir_local+"/*"),[""]])) else nullStrPlaceholder
         String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
 
         File?   downloaded_response_headers = if ( defined(downloaded_response_file) && save_response_header_to_file ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder

From b6fa0c352235afaa5dd3746d35f246dca4aa02dc Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 26 Mar 2025 03:50:08 -0400
Subject: [PATCH 23/26] fix conditional

---
 pipes/WDL/tasks/tasks_utils.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index dae97b9d6..77ec67d44 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -572,7 +572,7 @@ task download_from_url {
         #   select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
         #File?   downloaded_response_file_debug = read_string("FILE_LOCATION")
         #File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
-        File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) select_first(flatten([glob(download_subdir_local+"/*"),[""]])) else nullStrPlaceholder
+        File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then select_first(flatten([glob(download_subdir_local+"/*"),[""]])) else nullStrPlaceholder
         String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
 
         File?   downloaded_response_headers = if ( defined(downloaded_response_file) && save_response_header_to_file ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder

From c6acdc744c3e2a400c005605645b475eeba7ba08 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 26 Mar 2025 13:09:04 -0400
Subject: [PATCH 24/26] include an optional File? in flatten() call to appease
 dxWDL

include an optional File? in flatten() call to appease dxWDL and prevent the error:
```Failed to process task definition 'download_from_url' (reason 1 of 1): Failed to process expression 'if read_boolean("WAS_HTTP_DOWNLOAD") then select_first(flatten([glob((download_subdir_local + "/*")), [""]])) else nullStrPlaceholder' (reason 1 of 1): Invalid parameter 'Flatten(ArrayLiteral(Vector(Glob(Add(IdentifierLookup(download_subdir_local),StringLiteral(/*))), ArrayLiteral(Vector(StringLiteral())))))'. Expected an array of optional values (eg 'Array[X?]') but got 'Array[String]')```
---
 pipes/WDL/tasks/tasks_utils.wdl | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index 77ec67d44..8c8756f93 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -438,7 +438,7 @@ task download_from_url {
 
         # if this is an http[s] url, download the file
         # (otherwise just pass through the URL to the 'path_str' output)
-        if [[ ("~{url_to_download}" =~ ^(http|https|drs):// ) ]]; then
+        if [[ ("~{url_to_download}" =~ ^(http|https|ftp):// ) ]]; then
             mkdir -p "~{download_subdir_local}/tmp"
             
             pushd "~{download_subdir_local}"
@@ -485,7 +485,7 @@ task download_from_url {
 
             popd # return to downloaded/
 
-            # (only for http(s)) split http response headers from response body
+            # (only for http[s]) split http response headers from response body
             # since wget stores both in a single file separated by a couple newlines
             if [[ "~{url_to_download}" =~ ^https?:// ]] && ~{if save_response_header_to_file then "true" else "false"}; then
                 echo "Saving response headers separately..."
@@ -572,7 +572,16 @@ task download_from_url {
         #   select_first([download_from_url.downloaded_response_file, download_from_url.passthrough_url])
         #File?   downloaded_response_file_debug = read_string("FILE_LOCATION")
         #File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then read_string("FILE_LOCATION") else nullStrPlaceholder
-        File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then select_first(flatten([glob(download_subdir_local+"/*"),[""]])) else nullStrPlaceholder
+
+
+        #File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then glob(download_subdir_local+"/*") else nullStrPlaceholder
+
+        File?   downloaded_response_file = if (read_boolean("WAS_HTTP_DOWNLOAD")) then select_first(
+                                                                                                flatten([
+                                                                                                    glob(download_subdir_local+"/*"),
+                                                                                                    ["",nullStrPlaceholder]
+                                                                                                ])
+                                                                                        ) else nullStrPlaceholder
         String? passthrough_url          = if (read_boolean("WAS_HTTP_DOWNLOAD")) then nullStrPlaceholder else url_to_download
 
         File?   downloaded_response_headers = if ( defined(downloaded_response_file) && save_response_header_to_file ) then basename(read_string("FILE_LOCATION")) + ".headers" else nullStrPlaceholder

From 4234f03dc6c01c6fe7430e7e0d5407a27e30b8a2 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 26 Mar 2025 13:13:48 -0400
Subject: [PATCH 25/26] add comment about WDL 1.1 and "None"

---
 pipes/WDL/tasks/tasks_utils.wdl | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index 8c8756f93..eeb5924f0 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -391,7 +391,10 @@ task download_from_url {
 
         Int     disk_size = 50
 
-        # Do not use these inputs; they are placeholders to output null until WDL supports null literals
+        # Do not use these inputs; they are placeholders to output null values until 
+        # cromwell-on-Terra supports the null literal "None", available starting in WDL 1.1
+        # see:
+        #   https://github.com/openwdl/wdl/blob/wdl-1.1/SPEC.md#optional-types-and-none
         Int?    nullIntPlaceholder 
         String? nullStrPlaceholder
         File?   nullFilePlaceholder

From 54bc38fb4a985b71fbede9ad23ee0de0b40337e3 Mon Sep 17 00:00:00 2001
From: Christopher Tomkins-Tinch <tomkinsc@broadinstitute.org>
Date: Wed, 26 Mar 2025 13:16:38 -0400
Subject: [PATCH 26/26] comment

---
 pipes/WDL/tasks/tasks_utils.wdl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pipes/WDL/tasks/tasks_utils.wdl b/pipes/WDL/tasks/tasks_utils.wdl
index eeb5924f0..bee0aee82 100644
--- a/pipes/WDL/tasks/tasks_utils.wdl
+++ b/pipes/WDL/tasks/tasks_utils.wdl
@@ -392,7 +392,7 @@ task download_from_url {
         Int     disk_size = 50
 
         # Do not use these inputs; they are placeholders to output null values until 
-        # cromwell-on-Terra supports the null literal "None", available starting in WDL 1.1
+        # cromwell-on-Terra supports the null literal "None" available in WDL version >1.1
         # see:
         #   https://github.com/openwdl/wdl/blob/wdl-1.1/SPEC.md#optional-types-and-none
         Int?    nullIntPlaceholder