Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions data_structures/flag_filter.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ struct FlagFilter {
String exclude_if_all # samtools -G
}

#@ except: EmptyOutputs
task validate_string_is_12bit_int {
meta {
description: "Validates that a string is a octal, decimal, or hexadecimal number and less than 2^12."
Expand Down
1 change: 1 addition & 0 deletions data_structures/read_group.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ task get_read_groups {
}
}

#@ except: EmptyOutputs
task validate_read_group {
meta {
description: "Validate a `ReadGroup` struct's fields are defined and well-formed"
Expand Down
1 change: 1 addition & 0 deletions tools/fq.wdl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## [Homepage](https://github.com/stjude-rust-labs/fq)
version 1.1

#@ except: EmptyOutputs
task fqlint {
meta {
description: "Performs quality control on the input FASTQs to ensure proper formatting"
Expand Down
1 change: 1 addition & 0 deletions tools/samtools.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ version 1.1

import "../data_structures/flag_filter.wdl"

#@ except: EmptyOutputs
task quickcheck {
meta {
description: "Runs Samtools quickcheck on the input BAM file."
Expand Down
2 changes: 2 additions & 0 deletions tools/util.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ task calc_feature_lengths {
}
}

#@ except: EmptyOutputs
task compression_integrity {
meta {
description: "Checks the compression integrity of a bgzipped file"
Expand Down Expand Up @@ -358,6 +359,7 @@ task global_phred_scores {
}
}

#@ except: EmptyOutputs
task check_fastq_and_rg_concordance {
meta {
description: "Validates FASTQs and read group records are concordant"
Expand Down
1 change: 1 addition & 0 deletions workflows/dnaseq/dnaseq-standard.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ workflow dnaseq_standard_experimental {
}
}

#@ except: EmptyOutputs
task parse_input {
meta {
description: "Parses and validates the `dnaseq_standard` workflow's provided inputs"
Expand Down
6 changes: 6 additions & 0 deletions workflows/qc/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](http://keepachangelog.com/).

## 2026 May

### Added

- `quality_check_standard` workflow: optional FASTQ analysis via new input `run_fastq_analysis`, allowing callers to skip BAM-to-FASTQ conversion and FASTQ-level tools (Kraken2, fastp, librarian) ([#315](https://github.com/stjudecloud/workflows/pull/315))

## 2025 September

### Changed
Expand Down
199 changes: 104 additions & 95 deletions workflows/qc/quality-check-standard.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ workflow quality_check_standard {
description: "Only process a random sampling of approximately `n` reads. Any `n <= 0` for processing entire input.",
warning: "Subsampling is done probabalistically so the exact number of reads in the output will have some variation.",
}
run_fastq_analysis: {
description: "Create FASTQs from the input BAM and run FASTQ-level analyses?",
help: "If false, the pipeline skips SAMtools bam-to-fastq, fqlint, Kraken2, fastp, librarian, and comparative Kraken2. Also disables qualimap_rnaseq (requires a collated BAM from bam_to_fastq).",
}
}

input {
Expand Down Expand Up @@ -153,6 +157,7 @@ workflow quality_check_standard {
Boolean use_all_cores = false
Int optical_distance = 0
Int subsample_n_reads = -1
Boolean run_fastq_analysis = true
Comment thread
adthrasher marked this conversation as resolved.
Outdated
}

call parse_input { input:
Expand All @@ -164,7 +169,7 @@ workflow quality_check_standard {
call flag_filter.validate_flag_filter as kraken_filter_validator { input:
flags = standard_filter,
}
if (run_comparative_kraken) {
if (run_comparative_kraken && run_fastq_analysis) {
call flag_filter.validate_flag_filter as comparative_kraken_filter_validator { input:
flags = comparative_filter,
}
Expand Down Expand Up @@ -254,109 +259,111 @@ workflow quality_check_standard {
prefix = post_subsample_prefix,
}

call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { input:
bam = post_subsample_bam,
bitwise_filter = standard_filter,
prefix = post_subsample_prefix,
# RNA needs a collated BAM for Qualimap
# DNA can skip the associated storage costs
retain_collated_bam = rna,
# disabling fast_mode enables writing of secondary and supplementary alignments
# to the collated BAM when processing RNA.
# Those alignments are used downstream by Qualimap.
fast_mode = (!rna),
paired_end = true, # matches default but prevents user from overriding
use_all_cores,
}

call fq.fqlint { input:
read_one_fastq = select_first([
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq = select_first([
bam_to_fastq.read_two_fastq_gz,
"undefined",
]),
}
call kraken2.kraken after fqlint { input:
read_one_fastq_gz = select_first([
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq_gz = select_first([
bam_to_fastq.read_two_fastq_gz,
"undefined",
]),
db = kraken_db,
store_sequences = store_kraken_sequences,
prefix = post_subsample_prefix,
use_all_cores,
}
if (run_fastp) {
call fp.fastp after fqlint { input:
read_one_fastq = select_first([
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq = select_first([
bam_to_fastq.read_two_fastq_gz,
"undefined",
]),
output_fastq = false,
}
}
if (run_librarian) {
call libraran_tasks.librarian after fqlint { input:
read_one_fastq = select_first([
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
}
}

if (run_comparative_kraken) {
call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck after comparative_kraken_filter_validator {
input:
if (run_fastq_analysis) {
call samtools.bam_to_fastq after quickcheck after kraken_filter_validator { input:
bam = post_subsample_bam,
bitwise_filter = comparative_filter,
prefix = post_subsample_prefix + ".alt_filtered",
# matches default but prevents user from overriding
# If the user wants a collated BAM, they should save the one
# from the first bam_to_fastq call.
retain_collated_bam = false,
# matches default but prevents user from overriding
# Since the only output here is FASTQs, we can disable fast mode.
# This discards secondary and supplementary alignments, which should not
# be converted to FASTQs. (Is that true?)
fast_mode = true,
bitwise_filter = standard_filter,
prefix = post_subsample_prefix,
# RNA needs a collated BAM for Qualimap
# DNA can skip the associated storage costs
retain_collated_bam = rna,
# disabling fast_mode enables writing of secondary and supplementary alignments
# to the collated BAM when processing RNA.
# Those alignments are used downstream by Qualimap.
fast_mode = (!rna),
paired_end = true, # matches default but prevents user from overriding
use_all_cores,
}
call fq.fqlint as alt_filtered_fqlint { input:

call fq.fqlint { input:
read_one_fastq = select_first([
alt_filtered_fastq.read_one_fastq_gz,
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq = select_first([
alt_filtered_fastq.read_two_fastq_gz,
bam_to_fastq.read_two_fastq_gz,
"undefined",
]),
}
call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { input:
call kraken2.kraken after fqlint { input:
read_one_fastq_gz = select_first([
alt_filtered_fastq.read_one_fastq_gz,
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq_gz = select_first([
alt_filtered_fastq.read_two_fastq_gz,
bam_to_fastq.read_two_fastq_gz,
"undefined",
]),
db = kraken_db,
store_sequences = store_kraken_sequences,
prefix = post_subsample_prefix + ".alt_filtered",
prefix = post_subsample_prefix,
use_all_cores,
}
if (run_fastp) {
call fp.fastp after fqlint { input:
read_one_fastq = select_first([
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq = select_first([
bam_to_fastq.read_two_fastq_gz,
"undefined",
]),
output_fastq = false,
}
}
if (run_librarian) {
call libraran_tasks.librarian after fqlint { input:
read_one_fastq = select_first([
bam_to_fastq.read_one_fastq_gz,
"undefined",
]),
}
}

if (run_comparative_kraken) {
call samtools.bam_to_fastq as alt_filtered_fastq after quickcheck after comparative_kraken_filter_validator {
input:
bam = post_subsample_bam,
bitwise_filter = comparative_filter,
prefix = post_subsample_prefix + ".alt_filtered",
# matches default but prevents user from overriding
# If the user wants a collated BAM, they should save the one
# from the first bam_to_fastq call.
retain_collated_bam = false,
# matches default but prevents user from overriding
# Since the only output here is FASTQs, we can disable fast mode.
# This discards secondary and supplementary alignments, which should not
# be converted to FASTQs. (Is that true?)
fast_mode = true,
paired_end = true, # matches default but prevents user from overriding
use_all_cores,
}
call fq.fqlint as alt_filtered_fqlint { input:
read_one_fastq = select_first([
alt_filtered_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq = select_first([
alt_filtered_fastq.read_two_fastq_gz,
"undefined",
]),
}
call kraken2.kraken as comparative_kraken after alt_filtered_fqlint { input:
read_one_fastq_gz = select_first([
alt_filtered_fastq.read_one_fastq_gz,
"undefined",
]),
read_two_fastq_gz = select_first([
alt_filtered_fastq.read_two_fastq_gz,
"undefined",
]),
db = kraken_db,
store_sequences = store_kraken_sequences,
prefix = post_subsample_prefix + ".alt_filtered",
use_all_cores,
}
}
}

call mosdepth.coverage as wg_coverage after quickcheck { input:
Expand Down Expand Up @@ -392,18 +399,20 @@ workflow quality_check_standard {
]),
outfile_name = post_subsample_prefix + ".strandedness.tsv",
}
call qualimap.rnaseq as qualimap_rnaseq { input:
bam = select_first([
bam_to_fastq.collated_bam,
"undefined",
]),
prefix = post_subsample_prefix + ".qualimap_rnaseq_results",
gtf = select_first([
gtf,
"undefined",
]),
name_sorted = true,
paired_end = true, # matches default but prevents user from overriding
if (run_fastq_analysis) {
call qualimap.rnaseq as qualimap_rnaseq { input:
bam = select_first([
bam_to_fastq.collated_bam,
"undefined",
]),
prefix = post_subsample_prefix + ".qualimap_rnaseq_results",
gtf = select_first([
gtf,
"undefined",
]),
name_sorted = true,
paired_end = true, # matches default but prevents user from overriding
}
}
}
if (mark_duplicates) {
Expand Down
1 change: 1 addition & 0 deletions workflows/rnaseq/rnaseq-standard.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ workflow rnaseq_standard {
}
}

#@ except: EmptyOutputs
task parse_input {
meta {
description: "Parses and validates the `rnaseq_standard[_fastq]` workflows' provided inputs"
Expand Down
Loading