From d0d3a7b4a5d23d6ebfcf09a1a4f1331edafa4f18 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Fri, 19 Apr 2024 15:48:21 -0700 Subject: [PATCH 01/45] process by readgroup in progress --- config/methods.config | 4 ++++ main.nf | 5 ++++- module/bamqc_qualimap.nf | 2 +- module/collectWgsMetrics_picard.nf | 2 +- module/stats_samtools.nf | 7 ++++++- 5 files changed, 16 insertions(+), 4 deletions(-) diff --git a/config/methods.config b/config/methods.config index ae8786b..12ac3d5 100644 --- a/config/methods.config +++ b/config/methods.config @@ -19,9 +19,13 @@ methods { throw new Exception("Sample ${sm_tags[0]} was found in multiple BAMs. Please provide only one BAM per sample") } new_sm_tag = methods.sanitize_uclahs_cds_id(sm_tags[0]) + def rg_ids = bam_header['read_group'].collect{ it['ID'] } + def lb_ids = bam_header['read_group'].collect{ it['LB'] }.unique() params.samples_to_process.add([ 'orig_id': sm_tags[0], 'id': new_sm_tag, + 'read_groups': rg_ids, + 'libraries': lb_ids, 'path': bam_path, 'read_length': sampleMap.getOrDefault('read_length', null), 'sample_type': k diff --git a/main.nf b/main.nf index 74124bb..6e7f307 100755 --- a/main.nf +++ b/main.nf @@ -84,10 +84,13 @@ log.info """\ bamqc_additional_options: ${params.bamqc_additional_options} """ +// CHANGE: SAMPLES_TO_PROCESS by read_group. Call stats 3 times, once by readgroup, once by library and once by sample +// - first check if more than one read group/library +// - picard and qualimap will be called by sample Channel .fromList(params.samples_to_process) .map { sample -> - return tuple(sample.orig_id, sample.id, sample.path, sample.read_length, sample.sample_type) + return tuple(sample.orig_id, sample.id, sample.read_groups, sample.libraries, sample.path, sample.read_length, sample.sample_type) } .set { samplesToProcessChannel } diff --git a/module/bamqc_qualimap.nf b/module/bamqc_qualimap.nf index a9c7fd2..7ccfbd6 100644 --- a/module/bamqc_qualimap.nf +++ b/module/bamqc_qualimap.nf @@ -19,7 +19,7 @@ process run_bamqc_Qualimap { saveAs: { "${task.process.replace(':', '/')}-${id}/log${file(it).getName()}" } input: - tuple val(orig_id), val(id), path(path), val(read_length), val(sample_type) + tuple val(orig_id), val(id), val(read_groups), path(path), val(read_length), val(sample_type) output: path "*_stats", emit: stats diff --git a/module/collectWgsMetrics_picard.nf b/module/collectWgsMetrics_picard.nf index dfcac96..32b0ae4 100644 --- a/module/collectWgsMetrics_picard.nf +++ b/module/collectWgsMetrics_picard.nf @@ -20,7 +20,7 @@ process run_CollectWgsMetrics_Picard { saveAs: { "${task.process.replace(':', '/')}-${id}/log${file(it).getName()}" } input: - tuple val(orig_id), val(id), path(path), val(read_length), val(sample_type) + tuple val(orig_id), val(id), val(read_groups), path(path), val(read_length), val(sample_type) path reference path reference_index diff --git a/module/stats_samtools.nf b/module/stats_samtools.nf index 73e15e7..a81e2c4 100644 --- a/module/stats_samtools.nf +++ b/module/stats_samtools.nf @@ -20,7 +20,7 @@ process run_stats_SAMtools { saveAs: { "${task.process.replace(':', '/')}-${id}/log${file(it).getName()}" } input: - tuple val(orig_id), val(id), path(path), val(read_length), val(sample_type) + tuple val(orig_id), val(id), val(read_groups), path(path), val(read_length), val(sample_type) output: path "*stats.txt" @@ -33,8 +33,13 @@ process run_stats_SAMtools { [:]) rmdups = params.samtools_remove_duplicates ? "--remove-dups" : "" +// CHANGE: SAMPLES_TO_PROCESS by read_group. Call this process 3 times, once by readgroup, once by library and once by sample + read_groups_string = read_groups.join(" ") + println "read_groups_string: ${read_groups_string}" + """ set -euo pipefail samtools stats ${rmdups} ${params.samtools_stats_additional_options} ${path} > ${output_filename}_stats.txt + echo \${read_groups_string} """ } From adb05915daa9555af85e7c0818484d6e3924d8ec Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Mon, 10 Jun 2024 14:18:55 -0700 Subject: [PATCH 02/45] standardize algorithms to algorithm --- README.md | 2 +- config/schema.yaml | 2 +- config/template.config | 2 +- test/config/all-tools.config | 2 +- test/config/bamqc.config | 2 +- test/config/cwm.config | 2 +- test/config/stats.config | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 466604c..1edd32f 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ input: | Field | Type | Required | Description | | ----- | ---- | ------------ | ------------------------ | -| `algorithms` | list | no | List of tools to be run: ['stats', 'collectwgsmetrics', 'bamqc'], default = ['stats', 'collectwgsmetrics'] | +| `algorithm` | list | no | List of tools to be run: ['stats', 'collectwgsmetrics', 'bamqc'], default = ['stats', 'collectwgsmetrics'] | | `reference` | path | yes/no | Reference fasta is required only for `CollectWgsMetrics` | | `output_dir` | path | yes | Not required if `blcds_registered_dataset` = `true` | | `blcds_registered_dataset` | boolean | no | Default is `false`. Only `uclahs_cds` users should change this. When `true`, BLCDS folder structure is used | diff --git a/config/schema.yaml b/config/schema.yaml index 25c226a..d43815b 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -7,7 +7,7 @@ dataset_id: type: 'String' required: true help: 'Dataset identifier' -algorithms: +algorithm: type: 'List' required: false help: 'List of QC algorithms' diff --git a/config/template.config b/config/template.config index 17057d3..10130c5 100644 --- a/config/template.config +++ b/config/template.config @@ -8,7 +8,7 @@ includeConfig "${projectDir}/nextflow.config" // Inputs/parameters of the pipeline params { - algorithms = ['stats', 'collectwgsmetrics'] // 'stats', 'collectwgsmetrics', 'bamqc' + algorithm = ['stats', 'collectwgsmetrics'] // 'stats', 'collectwgsmetrics', 'bamqc' reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' output_dir = '/path/to/output/directory' blcds_registered_dataset = false // if you want the output to be registered diff --git a/test/config/all-tools.config b/test/config/all-tools.config index 0e0078b..a191b5f 100644 --- a/test/config/all-tools.config +++ b/test/config/all-tools.config @@ -8,7 +8,7 @@ includeConfig "${projectDir}/nextflow.config" // Inputs/parameters of the pipeline params { - algorithms = ['stats', 'collectwgsmetrics', 'bamqc'] // 'stats', 'collectwgsmetrics', 'bamqc' + algorithm = ['stats', 'collectwgsmetrics', 'bamqc'] // 'stats', 'collectwgsmetrics', 'bamqc' reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' blcds_registered_dataset = false // if you want the output to be registered save_intermediate_files = true diff --git a/test/config/bamqc.config b/test/config/bamqc.config index 8325987..027097b 100644 --- a/test/config/bamqc.config +++ b/test/config/bamqc.config @@ -8,7 +8,7 @@ includeConfig "${projectDir}/nextflow.config" // Inputs/parameters of the pipeline params { - algorithms = ['bamqc'] // 'stats', 'collectwgsmetrics', 'bamqc' + algorithm = ['bamqc'] // 'stats', 'collectwgsmetrics', 'bamqc' reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' blcds_registered_dataset = false // if you want the output to be registered save_intermediate_files = true diff --git a/test/config/cwm.config b/test/config/cwm.config index 2ac66c1..0275062 100644 --- a/test/config/cwm.config +++ b/test/config/cwm.config @@ -8,7 +8,7 @@ includeConfig "${projectDir}/nextflow.config" // Inputs/parameters of the pipeline params { - algorithms = ['collectwgsmetrics'] // 'stats', 'collectwgsmetrics', 'bamqc' + algorithm = ['collectwgsmetrics'] // 'stats', 'collectwgsmetrics', 'bamqc' reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' blcds_registered_dataset = false // if you want the output to be registered save_intermediate_files = true diff --git a/test/config/stats.config b/test/config/stats.config index c1043df..f53ec65 100644 --- a/test/config/stats.config +++ b/test/config/stats.config @@ -8,7 +8,7 @@ includeConfig "${projectDir}/nextflow.config" // Inputs/parameters of the pipeline params { - algorithms = ['stats'] // 'stats', 'collectwgsmetrics', 'bamqc' + algorithm = ['stats'] // 'stats', 'collectwgsmetrics', 'bamqc' reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' blcds_registered_dataset = false // if you want the output to be registered save_intermediate_files = true From ad00e21a076d7b266d920c6f26a50fc80b8948c6 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Tue, 11 Jun 2024 14:06:55 -0700 Subject: [PATCH 03/45] samtools stats by readgroup --- config/F16.config | 26 +++++++++++-- config/F2.config | 22 ++++++++++- config/F32.config | 26 +++++++++++-- config/F4.config | 26 +++++++++++-- config/F72.config | 26 +++++++++++-- config/F8.config | 26 +++++++++++-- config/M64.config | 26 +++++++++++-- config/methods.config | 36 ++++++++++++++---- main.nf | 61 ++++++++++++++++++++---------- module/bamqc_qualimap.nf | 6 +-- module/collectWgsMetrics_picard.nf | 6 +-- module/stats_samtools.nf | 44 ++++++++++++++------- 12 files changed, 266 insertions(+), 65 deletions(-) diff --git a/config/F16.config b/config/F16.config index a7d224d..eafc2d5 100644 --- a/config/F16.config +++ b/config/F16.config @@ -3,13 +3,33 @@ process { cpus = 1 memory = 250.MB } - withName: run_stats_SAMtools { + withName: run_stats_SAMtools_readgroup { cpus = 1 - memory = 1.GB + memory = 1500.MB retry_strategy { memory { strategy = 'add' - operand = 4.GB + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_library { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_sample { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB } } } diff --git a/config/F2.config b/config/F2.config index 23f7522..8b3f4bb 100644 --- a/config/F2.config +++ b/config/F2.config @@ -3,7 +3,27 @@ process { cpus = 1 memory = 250.MB } - withName: run_stats_SAMtools { + withName: run_stats_SAMtools_readgroup { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_library { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_sample { cpus = 1 memory = 1500.MB retry_strategy { diff --git a/config/F32.config b/config/F32.config index 3cc91f2..f97166c 100644 --- a/config/F32.config +++ b/config/F32.config @@ -3,13 +3,33 @@ process { cpus = 1 memory = 250.MB } - withName: run_stats_SAMtools { + withName: run_stats_SAMtools_readgroup { cpus = 1 - memory = 1.GB + memory = 1500.MB retry_strategy { memory { strategy = 'add' - operand = 4.GB + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_library { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_sample { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB } } } diff --git a/config/F4.config b/config/F4.config index 43b09ac..53ffc4e 100644 --- a/config/F4.config +++ b/config/F4.config @@ -3,13 +3,33 @@ process { cpus = 1 memory = 250.MB } - withName: run_stats_SAMtools { + withName: run_stats_SAMtools_readgroup { cpus = 1 - memory = 1.GB + memory = 1500.MB retry_strategy { memory { strategy = 'add' - operand = 3.GB + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_library { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_sample { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB } } } diff --git a/config/F72.config b/config/F72.config index 34a7fd8..ddf0c23 100644 --- a/config/F72.config +++ b/config/F72.config @@ -3,13 +3,33 @@ process { cpus = 1 memory = 250.MB } - withName: run_stats_SAMtools { + withName: run_stats_SAMtools_readgroup { cpus = 1 - memory = 1.GB + memory = 1500.MB retry_strategy { memory { strategy = 'add' - operand = 4.GB + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_library { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_sample { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB } } } diff --git a/config/F8.config b/config/F8.config index 8e2ac1e..d855406 100644 --- a/config/F8.config +++ b/config/F8.config @@ -3,13 +3,33 @@ process { cpus = 1 memory = 250.MB } - withName: run_stats_SAMtools { + withName: run_stats_SAMtools_readgroup { cpus = 1 - memory = 1.GB + memory = 1500.MB retry_strategy { memory { strategy = 'add' - operand = 4.GB + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_library { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_sample { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB } } } diff --git a/config/M64.config b/config/M64.config index f84f915..c36b758 100644 --- a/config/M64.config +++ b/config/M64.config @@ -3,13 +3,33 @@ process { cpus = 1 memory = 250.MB } - withName: run_stats_SAMtools { + withName: run_stats_SAMtools_readgroup { cpus = 1 - memory = 1.GB + memory = 1500.MB retry_strategy { memory { strategy = 'add' - operand = 4.GB + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_library { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } + withName: run_stats_SAMtools_sample { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB } } } diff --git a/config/methods.config b/config/methods.config index 12ac3d5..2058e40 100644 --- a/config/methods.config +++ b/config/methods.config @@ -7,6 +7,8 @@ includeConfig "${projectDir}/external/pipeline-Nextflow-config/config/retry/retr methods { get_ids_from_bams = { params.samples_to_process = [] as Set + params.readgroups_to_process = [] as Set + params.libraries_to_process = [] as Set params.input['BAM'].each { k, v -> v.each { sampleMap -> def bam_path = sampleMap['path'] @@ -18,18 +20,38 @@ methods { if (params.samples_to_process.any { it.orig_id == sm_tags[0] }) { throw new Exception("Sample ${sm_tags[0]} was found in multiple BAMs. Please provide only one BAM per sample") } - new_sm_tag = methods.sanitize_uclahs_cds_id(sm_tags[0]) - def rg_ids = bam_header['read_group'].collect{ it['ID'] } - def lb_ids = bam_header['read_group'].collect{ it['LB'] }.unique() + def new_sm_tag = methods.sanitize_uclahs_cds_id(sm_tags[0]) params.samples_to_process.add([ - 'orig_id': sm_tags[0], - 'id': new_sm_tag, - 'read_groups': rg_ids, - 'libraries': lb_ids, 'path': bam_path, + 'orig_id': sm_tags[0], + 'sm_id': new_sm_tag, 'read_length': sampleMap.getOrDefault('read_length', null), 'sample_type': k ]) + if (bam_header['read_group'].collect{ it['LB'] }.size() > 1) { + bam_header['read_group'].collect{ it['LB'] }.unique().each { lib -> + def lib_id = methods.sanitize_uclahs_cds_id(lib) + def rgs = bam_header['read_group'].findAll{ it['LB'] == lib }.collect{ it['ID'] } + params.libraries_to_process.add([ + 'path': bam_path, + 'sm_id': new_sm_tag, + 'rgs': rgs, + 'lib_id': lib_id + ]) + } + } + if (bam_header['read_group'].collect{ it['ID'] }.size() > 1) { + bam_header['read_group'].each { rgMap -> + def rg_id = methods.sanitize_uclahs_cds_id(rgMap['ID']) + params.readgroups_to_process.add([ + 'path': bam_path, + 'sm_id': new_sm_tag, + 'orig_rg_id': rgMap['ID'], + 'rg_id': rg_id, + 'lib_id': rgMap['LB'] + ]) + } + } } } } diff --git a/main.nf b/main.nf index 6e7f307..97eeba6 100755 --- a/main.nf +++ b/main.nf @@ -7,7 +7,7 @@ include { run_validate_PipeVal } from './external/pipeline-Nextflow-module/modul main_process: "./" //Save logs in /process-log/run_validate_PipeVal ] ) -include { run_stats_SAMtools } from './module/stats_samtools' addParams( +include { run_stats_SAMtools as run_stats_SAMtools_readgroup; run_stats_SAMtools as run_stats_SAMtools_library; run_stats_SAMtools as run_stats_SAMtools_sample } from './module/stats_samtools' addParams( workflow_output_dir: "${params.output_dir_base}/SAMtools-${params.samtools_version}", workflow_log_output_dir: "${params.log_output_dir}/process-log/SAMtools-${params.samtools_version}" ) @@ -41,7 +41,7 @@ log.info """\ qualimap: ${params.docker_image_qualimap} - input: - algorithm(s): ${params.algorithms} + algorithm(s): ${params.algorithm} dataset_id: ${params.dataset_id} patient_id: ${params.patient_id} tumor: ${params.samples_to_process.findAll{ it.sample_type == 'tumor' }['path']} @@ -52,9 +52,9 @@ log.info """\ - sample names extracted from input BAM files and sanitized: tumor in: ${params.samples_to_process.findAll{ it.sample_type == 'tumor' }['orig_id']} - tumor out: ${params.samples_to_process.findAll{ it.sample_type == 'tumor' }['id']} + tumor out: ${params.samples_to_process.findAll{ it.sample_type == 'tumor' }['sm_id']} normal in: ${params.samples_to_process.findAll{ it.sample_type == 'normal' }['orig_id']} - normal out: ${params.samples_to_process.findAll{ it.sample_type == 'normal' }['id']} + normal out: ${params.samples_to_process.findAll{ it.sample_type == 'normal' }['sm_id']} - output: output_dir: ${params.output_dir_base} @@ -84,15 +84,29 @@ log.info """\ bamqc_additional_options: ${params.bamqc_additional_options} """ -// CHANGE: SAMPLES_TO_PROCESS by read_group. Call stats 3 times, once by readgroup, once by library and once by sample -// - first check if more than one read group/library -// - picard and qualimap will be called by sample Channel .fromList(params.samples_to_process) - .map { sample -> - return tuple(sample.orig_id, sample.id, sample.read_groups, sample.libraries, sample.path, sample.read_length, sample.sample_type) + .map { sm -> + def rg_arg = "" + return tuple(sm.path, sm.orig_id, sm.sm_id, rg_arg, null, null, sm.sample_type, sm.read_length) } - .set { samplesToProcessChannel } + .set { samples_to_process_ch } + +Channel + .fromList(params.libraries_to_process) + .map { lib -> + def rg_arg = lib.rgs.collect { "-r ${it}" }.join(' ') + return tuple(lib.path, null, lib.sm_id, rg_arg, null, lib.lib_id, null, null) + } + .set { libraries_to_process_ch } + +Channel + .fromList(params.readgroups_to_process) + .map { rg -> + def rg_arg = "-r ${rg.orig_rg_id}" + return tuple(rg.path, null, rg.sm_id, rg_arg, rg.rg_id, rg.lib_id, null, null) + } + .set { readgroups_to_process_ch } Channel .fromList(params.samples_to_process) @@ -100,7 +114,7 @@ Channel .flatten() .set { files_to_validate_ch } -if ('collectwgsmetrics' in params.algorithms) { +if ('collectwgsmetrics' in params.algorithm) { if (!params.reference) { throw new Exception("Reference genome is required when using the 'collectwgsmetrics' algorithm. Please check the config file and try again.") } @@ -124,22 +138,31 @@ workflow { storeDir: "${params.output_dir_base}/validation" ) - if ('stats' in params.algorithms) { - run_stats_SAMtools( - samplesToProcessChannel + if ('stats' in params.algorithm) { + if (params.readgroups_to_process.size() > 0) { + run_stats_SAMtools_readgroup( + readgroups_to_process_ch + ) + } + if (params.libraries_to_process.size() > 0) { + run_stats_SAMtools_library( + libraries_to_process_ch + ) + } + run_stats_SAMtools_sample( + samples_to_process_ch ) } - - if ('collectwgsmetrics' in params.algorithms) { + if ('collectwgsmetrics' in params.algorithm) { run_CollectWgsMetrics_Picard( - samplesToProcessChannel, + samples_to_process_ch, params.reference, params.reference_index ) } - if ('bamqc' in params.algorithms) { + if ('bamqc' in params.algorithm) { run_bamqc_Qualimap( - samplesToProcessChannel, + readgroups_to_process_ch, ) } } diff --git a/module/bamqc_qualimap.nf b/module/bamqc_qualimap.nf index 7ccfbd6..4ba1eb8 100644 --- a/module/bamqc_qualimap.nf +++ b/module/bamqc_qualimap.nf @@ -16,10 +16,10 @@ process run_bamqc_Qualimap { publishDir path: "${params.workflow_log_output_dir}", pattern: ".command.*", mode: "copy", - saveAs: { "${task.process.replace(':', '/')}-${id}/log${file(it).getName()}" } + saveAs: { "${task.process.replace(':', '/')}-${sm_id}/log${file(it).getName()}" } input: - tuple val(orig_id), val(id), val(read_groups), path(path), val(read_length), val(sample_type) + tuple path(path), val(unused), val(sm_id), val(unused), val(unused), val(unused), val(unused), val(unused) output: path "*_stats", emit: stats @@ -28,7 +28,7 @@ process run_bamqc_Qualimap { script: output_filename = generate_standard_filename("Qualimap-${params.qualimap_version}", params.dataset_id, - id, + sm_id, [:]) """ diff --git a/module/collectWgsMetrics_picard.nf b/module/collectWgsMetrics_picard.nf index 32b0ae4..8575f16 100644 --- a/module/collectWgsMetrics_picard.nf +++ b/module/collectWgsMetrics_picard.nf @@ -17,10 +17,10 @@ process run_CollectWgsMetrics_Picard { publishDir path: "${params.workflow_log_output_dir}", pattern: ".command.*", mode: "copy", - saveAs: { "${task.process.replace(':', '/')}-${id}/log${file(it).getName()}" } + saveAs: { "${task.process.replace(':', '/')}-${sm_id}/log${file(it).getName()}" } input: - tuple val(orig_id), val(id), val(read_groups), path(path), val(read_length), val(sample_type) + tuple path(path), val(unused), val(sm_id), val(unused), val(unused), val(unused), val(unused), val(read_length) path reference path reference_index @@ -31,7 +31,7 @@ process run_CollectWgsMetrics_Picard { script: output_filename = generate_standard_filename("Picard-${params.picard_version}", params.dataset_id, - id, + sm_id, [:]) read_length_arg = read_length ? "-READ_LENGTH ${read_length}" : "" fast_algorithm_arg = params.cwm_use_fast_algorithm ? "-USE_FAST_ALGORITHM" : "" diff --git a/module/stats_samtools.nf b/module/stats_samtools.nf index a81e2c4..ebbe161 100644 --- a/module/stats_samtools.nf +++ b/module/stats_samtools.nf @@ -4,6 +4,7 @@ * */ + include { generate_standard_filename } from '../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf' process run_stats_SAMtools { @@ -12,34 +13,49 @@ process run_stats_SAMtools { publishDir path: "${params.workflow_output_dir}/output", pattern: "*stats.txt", mode: "copy", - enabled: true + enabled: true, + saveAs: { "${outdir}/${file(it).getName()}" } publishDir path: "${params.workflow_log_output_dir}", pattern: ".command.*", mode: "copy", - saveAs: { "${task.process.replace(':', '/')}-${id}/log${file(it).getName()}" } + saveAs: { "${task.process.replace(':', '/')}-${log_suffix}/log${file(it).getName()}" } input: - tuple val(orig_id), val(id), val(read_groups), path(path), val(read_length), val(sample_type) + tuple path(path), val(unused), val(sm_id), val(rg_arg), val(rg_id), val(lib_id), val(unused), val(unused) output: path "*stats.txt" path ".command.*" script: - output_filename = generate_standard_filename("SAMtools-${params.samtools_version}", - params.dataset_id, - id, - [:]) - rmdups = params.samtools_remove_duplicates ? "--remove-dups" : "" + if (task.process == "run_stats_SAMtools_sample") { + output_filename = generate_standard_filename("SAMtools-${params.samtools_version}", + params.dataset_id, + sm_id, + [:]) + outdir = "." + log_suffix = "${sm_id}" + } else if (task.process == "run_stats_SAMtools_library") { + output_filename = generate_standard_filename("SAMtools-${params.samtools_version}", + params.dataset_id, + lib_id, + [:]) + outdir = sm_id + log_suffix = "${sm_id}/${lib_id}" + } else if (task.process == "run_stats_SAMtools_readgroup") { + output_filename = generate_standard_filename("SAMtools-${params.samtools_version}", + params.dataset_id, + rg_id, + [:]) + outdir = "${sm_id}/${lib_id}" + log_suffix = "${sm_id}/${lib_id}/${rg_id}" + } -// CHANGE: SAMPLES_TO_PROCESS by read_group. Call this process 3 times, once by readgroup, once by library and once by sample - read_groups_string = read_groups.join(" ") - println "read_groups_string: ${read_groups_string}" + rmdups = params.samtools_remove_duplicates ? "--remove-dups" : "" """ set -euo pipefail - samtools stats ${rmdups} ${params.samtools_stats_additional_options} ${path} > ${output_filename}_stats.txt - echo \${read_groups_string} + samtools view -h ${rg_arg} ${path} | samtools stats ${rmdups} ${params.samtools_stats_additional_options} > ${output_filename}_stats.txt """ -} +} \ No newline at end of file From 046ce0267d79af6d61d9512bf4c20054a42f13be Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Tue, 11 Jun 2024 17:38:32 -0700 Subject: [PATCH 04/45] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ebef6d0..935aaa9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] ### Added +- Add per readgroup and per library functionality - Add Nextflow version requirement to `README` ### Changed From 07b5458ac977cb6371252011db2d951bb5ecb76c Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Tue, 11 Jun 2024 20:41:27 -0700 Subject: [PATCH 05/45] fix mislabel --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 97eeba6..f0133a5 100755 --- a/main.nf +++ b/main.nf @@ -162,7 +162,7 @@ workflow { } if ('bamqc' in params.algorithm) { run_bamqc_Qualimap( - readgroups_to_process_ch, + samples_to_process_ch, ) } } From eb422cc27d2898a4a8bbc309ca7bf038e668adb4 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Tue, 11 Jun 2024 21:26:24 -0700 Subject: [PATCH 06/45] revert unintentional resource changes --- config/F16.config | 12 ++++++------ config/F32.config | 12 ++++++------ config/F4.config | 12 ++++++------ config/F72.config | 12 ++++++------ config/F8.config | 12 ++++++------ config/M64.config | 12 ++++++------ 6 files changed, 36 insertions(+), 36 deletions(-) diff --git a/config/F16.config b/config/F16.config index eafc2d5..41f2766 100644 --- a/config/F16.config +++ b/config/F16.config @@ -5,31 +5,31 @@ process { } withName: run_stats_SAMtools_readgroup { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } withName: run_stats_SAMtools_library { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } withName: run_stats_SAMtools_sample { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } diff --git a/config/F32.config b/config/F32.config index f97166c..2ec47c3 100644 --- a/config/F32.config +++ b/config/F32.config @@ -5,31 +5,31 @@ process { } withName: run_stats_SAMtools_readgroup { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } withName: run_stats_SAMtools_library { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } withName: run_stats_SAMtools_sample { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } diff --git a/config/F4.config b/config/F4.config index 53ffc4e..e12d98b 100644 --- a/config/F4.config +++ b/config/F4.config @@ -5,31 +5,31 @@ process { } withName: run_stats_SAMtools_readgroup { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 3.GB } } } withName: run_stats_SAMtools_library { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 3.GB } } } withName: run_stats_SAMtools_sample { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 3.GB } } } diff --git a/config/F72.config b/config/F72.config index ddf0c23..0220198 100644 --- a/config/F72.config +++ b/config/F72.config @@ -5,31 +5,31 @@ process { } withName: run_stats_SAMtools_readgroup { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } withName: run_stats_SAMtools_library { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } withName: run_stats_SAMtools_sample { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } diff --git a/config/F8.config b/config/F8.config index d855406..0d77405 100644 --- a/config/F8.config +++ b/config/F8.config @@ -5,31 +5,31 @@ process { } withName: run_stats_SAMtools_readgroup { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } withName: run_stats_SAMtools_library { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } withName: run_stats_SAMtools_sample { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } diff --git a/config/M64.config b/config/M64.config index c36b758..c66dd4c 100644 --- a/config/M64.config +++ b/config/M64.config @@ -5,31 +5,31 @@ process { } withName: run_stats_SAMtools_readgroup { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } withName: run_stats_SAMtools_library { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } withName: run_stats_SAMtools_sample { cpus = 1 - memory = 1500.MB + memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 2000.MB + operand = 4.GB } } } From ae43ccd9df20b34daa5de9bc74fa5412b36d0acb Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 5 Jun 2024 11:57:38 -0700 Subject: [PATCH 07/45] add fastqc --- config/default.config | 2 ++ config/schema.yaml | 8 ++++++++ config/template.config | 5 ++++- main.nf | 14 ++++++++++++++ test/config/all-tools.config | 5 ++++- 5 files changed, 32 insertions(+), 2 deletions(-) diff --git a/config/default.config b/config/default.config index 265a5ca..49931df 100644 --- a/config/default.config +++ b/config/default.config @@ -17,9 +17,11 @@ params { // Docker images pipeval_version = "4.0.0-rc.2" samtools_version = "1.20" + fastqc_version = "0.12.1" picard_version = "3.1.0" qualimap_version = "2.3" docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}" + docker_image_fastqc = "${-> params.docker_container_registry}/fastqc:${params.fastqc_version}" docker_image_samtools = "${-> params.docker_container_registry}/samtools:${params.samtools_version}" docker_image_picard = "${-> params.docker_container_registry}/picard:${params.picard_version}" docker_image_qualimap = "${-> params.docker_container_registry}/qualimap:${params.qualimap_version}" diff --git a/config/schema.yaml b/config/schema.yaml index d43815b..d455dd5 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -12,10 +12,12 @@ algorithm: required: false help: 'List of QC algorithms' choices: + - fastqc - stats - collectwgsmetrics - bamqc default: + - fastqc - stats - collectwgsmetrics reference: @@ -38,6 +40,12 @@ save_intermediate_files: required: false default: false help: 'The option to save the intermediate files' +fastqc_additional_options: + type: 'String' + required: false + allow_empty: true + default: '' + help: 'Additional arguments for FastQC command' samtools_remove_duplicates: type: 'Bool' required: false diff --git a/config/template.config b/config/template.config index 10130c5..39b4196 100644 --- a/config/template.config +++ b/config/template.config @@ -8,7 +8,7 @@ includeConfig "${projectDir}/nextflow.config" // Inputs/parameters of the pipeline params { - algorithm = ['stats', 'collectwgsmetrics'] // 'stats', 'collectwgsmetrics', 'bamqc' + algorithm = ['fastqc', 'stats', 'collectwgsmetrics'] // 'fastqc', 'stats', 'collectwgsmetrics', 'bamqc' reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' output_dir = '/path/to/output/directory' blcds_registered_dataset = false // if you want the output to be registered @@ -29,6 +29,9 @@ params { bamqc_outformat = 'pdf' // 'html' or 'pdf' bamqc_additional_options = '' + // FastQC options + fastqc_additional_options = '' + // Base resource allocation updater // See README for adding parameters to update the base resource allocations } diff --git a/main.nf b/main.nf index f0133a5..f823b63 100755 --- a/main.nf +++ b/main.nf @@ -22,6 +22,11 @@ include { run_bamqc_Qualimap } from './module/bamqc_qualimap' addParams( workflow_log_output_dir: "${params.log_output_dir}/process-log/Qualimap-${params.qualimap_version}" ) +include { assess_ReadQuality_FastQC } from './module/fastqc' addParams( + workflow_output_dir: "${params.output_dir_base}/FastQC-${params.fastqc_version}", + workflow_log_output_dir: "${params.log_output_dir}/process-log/FastQC-${params.fastqc_version}" + ) + include { indexFile } from './external/pipeline-Nextflow-module/modules/common/indexFile/main.nf' log.info """\ @@ -82,6 +87,10 @@ log.info """\ qualimap_version: ${params.qualimap_version} bamqc_outformat: ${params.bamqc_outformat} bamqc_additional_options: ${params.bamqc_additional_options} + + - FastQC options: + fastqc_version: ${params.fastqc_version} + fastqc_additional_options: ${params.fastqc_additional_options} """ Channel @@ -153,6 +162,11 @@ workflow { samples_to_process_ch ) } + if ('fastqc' in params.algorithms) { + assess_ReadQuality_FastQC( + samplesToProcessChannel + ) + } if ('collectwgsmetrics' in params.algorithm) { run_CollectWgsMetrics_Picard( samples_to_process_ch, diff --git a/test/config/all-tools.config b/test/config/all-tools.config index a191b5f..1767142 100644 --- a/test/config/all-tools.config +++ b/test/config/all-tools.config @@ -8,7 +8,7 @@ includeConfig "${projectDir}/nextflow.config" // Inputs/parameters of the pipeline params { - algorithm = ['stats', 'collectwgsmetrics', 'bamqc'] // 'stats', 'collectwgsmetrics', 'bamqc' + algorithm = ['fastqc', 'stats', 'collectwgsmetrics', 'bamqc'] // 'fastqc', 'stats', 'collectwgsmetrics', 'bamqc' reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' blcds_registered_dataset = false // if you want the output to be registered save_intermediate_files = true @@ -28,6 +28,9 @@ params { bamqc_outformat = 'pdf' bamqc_additional_options = '' + // FastQC options + fastqc_additional_options = '' + // Base resource allocation updater // See README for adding parameters to update the base resource allocations } From 68cf4b4c5cc8359f8a0e6aced29eb6a56753c4e5 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 5 Jun 2024 12:17:25 -0700 Subject: [PATCH 08/45] add fastqc module --- module/fastqc.nf | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 module/fastqc.nf diff --git a/module/fastqc.nf b/module/fastqc.nf new file mode 100644 index 0000000..2e2668c --- /dev/null +++ b/module/fastqc.nf @@ -0,0 +1,46 @@ +/* +* Nextflow module for running FASTQC +* +* @input fq_path path path to the input FASTQ file +* @output fastqc_output_dir dir unzipped FASTQC output directory +*/ + +include { generate_standard_filename } from '../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf' + +process assess_ReadQuality_FastQC { + container params.docker_image_fastqc + + publishDir path: "${params.workflow_output_dir}/output", + pattern: "${output_filename}", + mode: "copy", + enabled: true + publishDir path: "${params.workflow_log_output_dir}", + pattern: ".command.*", + mode: "copy", + saveAs: { "${task.process.replace(':', '/')}-${id}/log${file(it).getName()}" } + + input: + tuple val(orig_id), val(id), path(path), val(read_length), val(sample_type) + + output: + path("${output_filename}") + + script: + output_filename = generate_standard_filename("SAMtools-${params.samtools_version}", + params.dataset_id, + id, + [:]) + + """ + set -euo pipefail + mkdir "${output_filename}" + fastqc \ + --outdir "${output_filename}" \ + --threads ${task.cpus} \ + --format bam \ + --extract \ + --delete \ + ${params.fastqc_additional_options} \ + ${path} + """ +} From b738e5158fb30c0a8c0a7248a1db4013dde30237 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 5 Jun 2024 12:49:45 -0700 Subject: [PATCH 09/45] use process_afterscript --- config/methods.config | 1 + module/bamqc_qualimap.nf | 5 +---- module/collectWgsMetrics_picard.nf | 5 +---- module/fastqc.nf | 5 +---- module/stats_samtools.nf | 5 +---- 5 files changed, 5 insertions(+), 16 deletions(-) diff --git a/config/methods.config b/config/methods.config index 2058e40..fd4ce41 100644 --- a/config/methods.config +++ b/config/methods.config @@ -98,5 +98,6 @@ methods { methods.set_output_dir() methods.set_pipeline_logs() methods.setup_docker_cpus() + methods.setup_process_afterscript() } } diff --git a/module/bamqc_qualimap.nf b/module/bamqc_qualimap.nf index 4ba1eb8..2c4a4b4 100644 --- a/module/bamqc_qualimap.nf +++ b/module/bamqc_qualimap.nf @@ -13,10 +13,7 @@ process run_bamqc_Qualimap { mode: "copy", enabled: true - publishDir path: "${params.workflow_log_output_dir}", - pattern: ".command.*", - mode: "copy", - saveAs: { "${task.process.replace(':', '/')}-${sm_id}/log${file(it).getName()}" } + ext log_dir_suffix: { "-${sm_id}" } input: tuple path(path), val(unused), val(sm_id), val(unused), val(unused), val(unused), val(unused), val(unused) diff --git a/module/collectWgsMetrics_picard.nf b/module/collectWgsMetrics_picard.nf index 8575f16..9890313 100644 --- a/module/collectWgsMetrics_picard.nf +++ b/module/collectWgsMetrics_picard.nf @@ -14,10 +14,7 @@ process run_CollectWgsMetrics_Picard { mode: "copy", enabled: true - publishDir path: "${params.workflow_log_output_dir}", - pattern: ".command.*", - mode: "copy", - saveAs: { "${task.process.replace(':', '/')}-${sm_id}/log${file(it).getName()}" } + ext log_dir_suffix: { "-${sm_id}" } input: tuple path(path), val(unused), val(sm_id), val(unused), val(unused), val(unused), val(unused), val(read_length) diff --git a/module/fastqc.nf b/module/fastqc.nf index 2e2668c..71d6c96 100644 --- a/module/fastqc.nf +++ b/module/fastqc.nf @@ -14,10 +14,7 @@ process assess_ReadQuality_FastQC { pattern: "${output_filename}", mode: "copy", enabled: true - publishDir path: "${params.workflow_log_output_dir}", - pattern: ".command.*", - mode: "copy", - saveAs: { "${task.process.replace(':', '/')}-${id}/log${file(it).getName()}" } + ext log_dir_suffix: { "-${id}" } input: tuple val(orig_id), val(id), path(path), val(read_length), val(sample_type) diff --git a/module/stats_samtools.nf b/module/stats_samtools.nf index ebbe161..0b76b10 100644 --- a/module/stats_samtools.nf +++ b/module/stats_samtools.nf @@ -16,10 +16,7 @@ process run_stats_SAMtools { enabled: true, saveAs: { "${outdir}/${file(it).getName()}" } - publishDir path: "${params.workflow_log_output_dir}", - pattern: ".command.*", - mode: "copy", - saveAs: { "${task.process.replace(':', '/')}-${log_suffix}/log${file(it).getName()}" } + ext log_dir_suffix: { "-${log_suffix}" } input: tuple path(path), val(unused), val(sm_id), val(rg_arg), val(rg_id), val(lib_id), val(unused), val(unused) From 27f1581d1ed880300c1a790ab80cfd4ea7c4b17c Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 5 Jun 2024 12:58:56 -0700 Subject: [PATCH 10/45] update changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 935aaa9..db3f906 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ## [Unreleased] ### Added +- Add FastQC workflow - Add per readgroup and per library functionality +- Add `process_afterscript` - Add Nextflow version requirement to `README` ### Changed From 1c922f9238e7611ec8f85a7395d88135ce787ca0 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 5 Jun 2024 15:08:52 -0700 Subject: [PATCH 11/45] update nftest for fastqc --- CHANGELOG.md | 1 + module/fastqc.nf | 2 +- nftest.yml | 29 +++++++++++++++++++++++++++++ test/config/fastqc.config | 25 +++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 test/config/fastqc.config diff --git a/CHANGELOG.md b/CHANGELOG.md index db3f906..2d05d86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Changed - Update SAMtools 1.18 to 1.20 +- Update NFTest for FastQC - Update repository/pipeline description - Update Nextflow configuration test workflows diff --git a/module/fastqc.nf b/module/fastqc.nf index 71d6c96..2f285d9 100644 --- a/module/fastqc.nf +++ b/module/fastqc.nf @@ -23,7 +23,7 @@ process assess_ReadQuality_FastQC { path("${output_filename}") script: - output_filename = generate_standard_filename("SAMtools-${params.samtools_version}", + output_filename = generate_standard_filename("FastQC-${params.fastqc_version}", params.dataset_id, id, [:]) diff --git a/nftest.yml b/nftest.yml index 1ab5df8..47ac36f 100644 --- a/nftest.yml +++ b/nftest.yml @@ -14,6 +14,12 @@ cases: skip: false verbose: true asserts: + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N-n2_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_HG002.N/HG002.N-n2_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt + script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_HG002.N_stats.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_HG002.N_stats.txt script: test/assert_txt.sh @@ -40,6 +46,15 @@ cases: skip: false verbose: true asserts: + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N-n2_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_HG002.N/HG002.N-n2_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5.n1/S2.T-n1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_S2-v1.1.5.n1/S2.T-n1_fastqc/fastqc_data.txt + script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_HG002.N_stats.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_HG002.N_stats.txt script: test/assert_txt.sh @@ -67,6 +82,20 @@ cases: - actual: generate-SQC-BAM-*/TWGSAMIN000001/Qualimap-*/output/Qualimap-*_TWGSAMIN_S2-v1.1.5.n1_stats/genome_results.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_TWGSAMIN_S2-v1.1.5.n1_stats/genome_results.txt method: md5 + - name: a_mini-fastqc + message: test fastqc + nf_script: main.nf + nf_config: test/config/fastqc.config + params_file: test/yaml/a_mini.yaml + skip: true + verbose: true + asserts: + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N-n2_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_HG002.N/HG002.N-n2_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt + script: test/assert_txt.sh - name: a_mini-stats message: test samtools stats nf_script: main.nf diff --git a/test/config/fastqc.config b/test/config/fastqc.config new file mode 100644 index 0000000..d460f83 --- /dev/null +++ b/test/config/fastqc.config @@ -0,0 +1,25 @@ +// EXECUTION SETTINGS AND GLOBAL DEFAULTS + +// External config files import. DO NOT MODIFY THESE LINES! +includeConfig "${projectDir}/config/default.config" +includeConfig "${projectDir}/config/methods.config" +includeConfig "${projectDir}/nextflow.config" + + +// Inputs/parameters of the pipeline +params { + algorithms = ['fastqc'] + reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' + blcds_registered_dataset = false // if you want the output to be registered + save_intermediate_files = true + + // SAMtools stats options + samtools_remove_duplicates = false + samtools_stats_additional_options = '' + + // Base resource allocation updater + // See README for adding parameters to update the base resource allocations +} + +// Setup the pipeline config. DO NOT REMOVE THIS LINE! +methods.setup() From dff3a7ae68a584a960ecefe6475db79c7932a785 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 5 Jun 2024 15:18:27 -0700 Subject: [PATCH 12/45] fix nftest path --- nftest.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nftest.yml b/nftest.yml index 47ac36f..d6b8e15 100644 --- a/nftest.yml +++ b/nftest.yml @@ -52,8 +52,8 @@ cases: - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5.n1/S2.T-n1_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_S2-v1.1.5.n1/S2.T-n1_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5.n1/S2.T-n1_SMadjust_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_S2-v1.1.5.n1/S2.T-n1_SMadjust_fastqc/fastqc_data.txt script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_HG002.N_stats.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_HG002.N_stats.txt From dbf3a01a63b0c71c811745cf1551c53140eacdd1 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 12 Jun 2024 12:24:21 -0700 Subject: [PATCH 13/45] merge with sfitz-by-readgroup complete and tested --- main.nf | 4 ++-- module/fastqc.nf | 6 +++--- test/config/fastqc.config | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/main.nf b/main.nf index f823b63..433de5c 100755 --- a/main.nf +++ b/main.nf @@ -162,9 +162,9 @@ workflow { samples_to_process_ch ) } - if ('fastqc' in params.algorithms) { + if ('fastqc' in params.algorithm) { assess_ReadQuality_FastQC( - samplesToProcessChannel + samples_to_process_ch ) } if ('collectwgsmetrics' in params.algorithm) { diff --git a/module/fastqc.nf b/module/fastqc.nf index 2f285d9..707d126 100644 --- a/module/fastqc.nf +++ b/module/fastqc.nf @@ -14,10 +14,10 @@ process assess_ReadQuality_FastQC { pattern: "${output_filename}", mode: "copy", enabled: true - ext log_dir_suffix: { "-${id}" } + ext log_dir_suffix: { "-${sm_id}" } input: - tuple val(orig_id), val(id), path(path), val(read_length), val(sample_type) + tuple path(path), val(unused), val(sm_id), val(rg_arg), val(unused), val(unused), val(unused), val(read_length) output: path("${output_filename}") @@ -25,7 +25,7 @@ process assess_ReadQuality_FastQC { script: output_filename = generate_standard_filename("FastQC-${params.fastqc_version}", params.dataset_id, - id, + sm_id, [:]) """ diff --git a/test/config/fastqc.config b/test/config/fastqc.config index d460f83..dce8944 100644 --- a/test/config/fastqc.config +++ b/test/config/fastqc.config @@ -8,7 +8,7 @@ includeConfig "${projectDir}/nextflow.config" // Inputs/parameters of the pipeline params { - algorithms = ['fastqc'] + algorithm = ['fastqc'] reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' blcds_registered_dataset = false // if you want the output to be registered save_intermediate_files = true From 9fd01e9e640698abe651bedcdb717176874bc677 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Thu, 13 Jun 2024 14:54:30 -0700 Subject: [PATCH 14/45] use fastqc docker with samtools --- config/default.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/default.config b/config/default.config index 49931df..cecc8e9 100644 --- a/config/default.config +++ b/config/default.config @@ -16,8 +16,8 @@ params { // Docker images pipeval_version = "4.0.0-rc.2" + fastqc_version = "0.12.1_samtools-1.20" samtools_version = "1.20" - fastqc_version = "0.12.1" picard_version = "3.1.0" qualimap_version = "2.3" docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}" From c5407f821b7bb7f581d69644f62ab286a586fec0 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Thu, 13 Jun 2024 14:57:29 -0700 Subject: [PATCH 15/45] fastqc by readgroup --- main.nf | 2 +- module/fastqc.nf | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/main.nf b/main.nf index 433de5c..cf56d3a 100755 --- a/main.nf +++ b/main.nf @@ -164,7 +164,7 @@ workflow { } if ('fastqc' in params.algorithm) { assess_ReadQuality_FastQC( - samples_to_process_ch + readgroups_to_process_ch ) } if ('collectwgsmetrics' in params.algorithm) { diff --git a/module/fastqc.nf b/module/fastqc.nf index 707d126..2f7400e 100644 --- a/module/fastqc.nf +++ b/module/fastqc.nf @@ -14,30 +14,32 @@ process assess_ReadQuality_FastQC { pattern: "${output_filename}", mode: "copy", enabled: true - ext log_dir_suffix: { "-${sm_id}" } + ext log_dir_suffix: { "-${target}" } input: - tuple path(path), val(unused), val(sm_id), val(rg_arg), val(unused), val(unused), val(unused), val(read_length) + tuple path(path), val(unused), val(sm_id), val(rg_arg), val(rg_id), val(unused), val(unused), val(ununsed) output: path("${output_filename}") script: + target = "${sm_id}-${rg_id}" output_filename = generate_standard_filename("FastQC-${params.fastqc_version}", params.dataset_id, - sm_id, + target, [:]) """ set -euo pipefail mkdir "${output_filename}" - fastqc \ - --outdir "${output_filename}" \ - --threads ${task.cpus} \ - --format bam \ + samtools view -F 0x900 -h ${rg_arg} ${path} | \ + samtools fastq | \ + fastqc \ + --outdir "./" \ + --format fastq \ --extract \ --delete \ ${params.fastqc_additional_options} \ - ${path} + stdin:${output_filename} """ } From 8653bbc9887aca734a3ccd51430d5d2ad1c05681 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Thu, 13 Jun 2024 16:32:51 -0700 Subject: [PATCH 16/45] nftest paths updated --- nftest.yml | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/nftest.yml b/nftest.yml index d6b8e15..a60a746 100644 --- a/nftest.yml +++ b/nftest.yml @@ -14,11 +14,11 @@ cases: skip: false verbose: true asserts: - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N-n2_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_HG002.N/HG002.N-n2_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_HG002.N_stats.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_HG002.N_stats.txt @@ -46,14 +46,14 @@ cases: skip: false verbose: true asserts: - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N-n2_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_HG002.N/HG002.N-n2_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5.n1/S2.T-n1_SMadjust_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_S2-v1.1.5.n1/S2.T-n1_SMadjust_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5.n1/S2-v1.1.5.n1_SMadjust_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5.n1/S2-v1.1.5.n1_SMadjust_fastqc/fastqc_data.txt script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_HG002.N_stats.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_HG002.N_stats.txt @@ -90,11 +90,11 @@ cases: skip: true verbose: true asserts: - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N-n2_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_HG002.N/HG002.N-n2_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1_TWGSAMIN_S2-v1.1.5/S2.T-n2_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt script: test/assert_txt.sh - name: a_mini-stats message: test samtools stats From 20085c3d95fe1fffbc99c1a9f7e55dd2a779771c Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Fri, 14 Jun 2024 10:37:15 -0700 Subject: [PATCH 17/45] refactor channels --- config/methods.config | 38 +++++++++++++++++--------------------- main.nf | 21 +++++++++++++++++---- module/fastqc.nf | 8 ++++---- module/stats_samtools.nf | 25 ++++++++++--------------- 4 files changed, 48 insertions(+), 44 deletions(-) diff --git a/config/methods.config b/config/methods.config index fd4ce41..ec2fe38 100644 --- a/config/methods.config +++ b/config/methods.config @@ -28,29 +28,25 @@ methods { 'read_length': sampleMap.getOrDefault('read_length', null), 'sample_type': k ]) - if (bam_header['read_group'].collect{ it['LB'] }.size() > 1) { - bam_header['read_group'].collect{ it['LB'] }.unique().each { lib -> - def lib_id = methods.sanitize_uclahs_cds_id(lib) - def rgs = bam_header['read_group'].findAll{ it['LB'] == lib }.collect{ it['ID'] } - params.libraries_to_process.add([ - 'path': bam_path, - 'sm_id': new_sm_tag, - 'rgs': rgs, - 'lib_id': lib_id - ]) - } - } - if (bam_header['read_group'].collect{ it['ID'] }.size() > 1) { - bam_header['read_group'].each { rgMap -> - def rg_id = methods.sanitize_uclahs_cds_id(rgMap['ID']) - params.readgroups_to_process.add([ - 'path': bam_path, - 'sm_id': new_sm_tag, - 'orig_rg_id': rgMap['ID'], - 'rg_id': rg_id, - 'lib_id': rgMap['LB'] + bam_header['read_group'].collect{ it['LB'] }.unique().each { lib -> + def lib_id = methods.sanitize_uclahs_cds_id(lib) + def rgs = bam_header['read_group'].findAll{ it['LB'] == lib }.collect{ it['ID'] } + params.libraries_to_process.add([ + 'path': bam_path, + 'sm_id': new_sm_tag, + 'rgs': rgs, + 'lib_id': lib_id ]) } + bam_header['read_group'].each { rgMap -> + def rg_id = methods.sanitize_uclahs_cds_id(rgMap['ID']) + params.readgroups_to_process.add([ + 'path': bam_path, + 'sm_id': new_sm_tag, + 'orig_rg_id': rgMap['ID'], + 'rg_id': rg_id, + 'lib_id': rgMap['LB'] + ]) } } } diff --git a/main.nf b/main.nf index cf56d3a..a35f92c 100755 --- a/main.nf +++ b/main.nf @@ -7,9 +7,22 @@ include { run_validate_PipeVal } from './external/pipeline-Nextflow-module/modul main_process: "./" //Save logs in /process-log/run_validate_PipeVal ] ) -include { run_stats_SAMtools as run_stats_SAMtools_readgroup; run_stats_SAMtools as run_stats_SAMtools_library; run_stats_SAMtools as run_stats_SAMtools_sample } from './module/stats_samtools' addParams( +include { run_stats_SAMtools as run_stats_SAMtools_readgroup } from './module/stats_samtools' addParams( workflow_output_dir: "${params.output_dir_base}/SAMtools-${params.samtools_version}", - workflow_log_output_dir: "${params.log_output_dir}/process-log/SAMtools-${params.samtools_version}" + workflow_log_output_dir: "${params.log_output_dir}/process-log/SAMtools-${params.samtools_version}", + stat_mode: "readgroup" + ) + +include { run_stats_SAMtools as run_stats_SAMtools_library } from './module/stats_samtools' addParams( + workflow_output_dir: "${params.output_dir_base}/SAMtools-${params.samtools_version}", + workflow_log_output_dir: "${params.log_output_dir}/process-log/SAMtools-${params.samtools_version}", + stat_mode: "library" + ) + +include { run_stats_SAMtools as run_stats_SAMtools_sample } from './module/stats_samtools' addParams( + workflow_output_dir: "${params.output_dir_base}/SAMtools-${params.samtools_version}", + workflow_log_output_dir: "${params.log_output_dir}/process-log/SAMtools-${params.samtools_version}", + stat_mode: "sample" ) include { run_CollectWgsMetrics_Picard } from './module/collectWgsMetrics_picard' addParams( @@ -148,12 +161,12 @@ workflow { ) if ('stats' in params.algorithm) { - if (params.readgroups_to_process.size() > 0) { + if (params.readgroups_to_process.size() > 1) { run_stats_SAMtools_readgroup( readgroups_to_process_ch ) } - if (params.libraries_to_process.size() > 0) { + if (params.libraries_to_process.size() > 1) { run_stats_SAMtools_library( libraries_to_process_ch ) diff --git a/module/fastqc.nf b/module/fastqc.nf index 2f7400e..b2e9f2c 100644 --- a/module/fastqc.nf +++ b/module/fastqc.nf @@ -11,7 +11,7 @@ process assess_ReadQuality_FastQC { container params.docker_image_fastqc publishDir path: "${params.workflow_output_dir}/output", - pattern: "${output_filename}", + pattern: "${sm_id}/${output_filename}_fastqc", mode: "copy", enabled: true ext log_dir_suffix: { "-${target}" } @@ -20,7 +20,7 @@ process assess_ReadQuality_FastQC { tuple path(path), val(unused), val(sm_id), val(rg_arg), val(rg_id), val(unused), val(unused), val(ununsed) output: - path("${output_filename}") + path("${sm_id}/${output_filename}_fastqc") script: target = "${sm_id}-${rg_id}" @@ -31,11 +31,11 @@ process assess_ReadQuality_FastQC { """ set -euo pipefail - mkdir "${output_filename}" + mkdir -p ${sm_id} samtools view -F 0x900 -h ${rg_arg} ${path} | \ samtools fastq | \ fastqc \ - --outdir "./" \ + --outdir "${sm_id}" \ --format fastq \ --extract \ --delete \ diff --git a/module/stats_samtools.nf b/module/stats_samtools.nf index 0b76b10..0303d39 100644 --- a/module/stats_samtools.nf +++ b/module/stats_samtools.nf @@ -26,28 +26,23 @@ process run_stats_SAMtools { path ".command.*" script: - if (task.process == "run_stats_SAMtools_sample") { - output_filename = generate_standard_filename("SAMtools-${params.samtools_version}", - params.dataset_id, - sm_id, - [:]) + if (params.stat_mode == "sample") { + filename_id = sm_id outdir = "." log_suffix = "${sm_id}" - } else if (task.process == "run_stats_SAMtools_library") { - output_filename = generate_standard_filename("SAMtools-${params.samtools_version}", - params.dataset_id, - lib_id, - [:]) + } else if (params.stat_mode == "library") { + filename_id = lib_id outdir = sm_id log_suffix = "${sm_id}/${lib_id}" - } else if (task.process == "run_stats_SAMtools_readgroup") { - output_filename = generate_standard_filename("SAMtools-${params.samtools_version}", - params.dataset_id, - rg_id, - [:]) + } else if (params.stat_mode == "readgroup") { + filename_id = rg_id outdir = "${sm_id}/${lib_id}" log_suffix = "${sm_id}/${lib_id}/${rg_id}" } + output_filename = generate_standard_filename("SAMtools-${params.samtools_version}", + params.dataset_id, + filename_id, + [:]) rmdups = params.samtools_remove_duplicates ? "--remove-dups" : "" From b34451e15d51f1a35b463a0debd78ed32e69aa27 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Fri, 14 Jun 2024 12:19:08 -0700 Subject: [PATCH 18/45] add hg003 to NFTest --- nftest.yml | 90 +++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 76 insertions(+), 14 deletions(-) diff --git a/nftest.yml b/nftest.yml index a60a746..a5e4ef6 100644 --- a/nftest.yml +++ b/nftest.yml @@ -6,6 +6,68 @@ global: clean_logs: true cases: + - name: hg003-all-tools + message: test all tools with downsampled HG003 subsetted to 6 readgroups and 3 libraries + nf_script: main.nf + nf_config: test/config/all-tools.config + params_file: test/yaml/HG003_0.05x-selected-readgroups.yaml + skip: false + verbose: true + asserts: + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/HG003/FastQC-*_GIAB_HG003-HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/HG003/FastQC-*_GIAB_HG003-HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/HG003/FastQC-*_GIAB_HG003-HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/HG003/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1.L001-001.SeqL001-001_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1.L001-001.SeqL001-001_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/HG003/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1.L002-002.SeqL002-002_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1.L002-002.SeqL002-002_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/HG003/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1.L002-005.SeqL002-005_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1.L002-005.SeqL002-005_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/SAMtools-*_GIAB_HG003_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/SAMtools-*_GIAB_HG003.H9YY4ADXX.3F1_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.H9YY4ADXX.3F1_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/SAMtools-*_GIAB_HG003.HA0L6ADXX.3F1_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA0L6ADXX.3F1_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/SAMtools-*_GIAB_HG003.HA660ADXX.3L1_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA660ADXX.3L1_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/HG003.H9YY4ADXX.3F1/SAMtools-*_GIAB_HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/HG003.HA0L6ADXX.3F1/SAMtools-*_GIAB_HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/HG003.HA0L6ADXX.3F1/SAMtools-*_GIAB_HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/HG003.HA660ADXX.3L1/SAMtools-*_GIAB_HG003.HA660ADXX.3L1.L001-001.SeqL001-001_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA660ADXX.3L1.L001-001.SeqL001-001_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/HG003.HA660ADXX.3L1/SAMtools-*_GIAB_HG003.HA660ADXX.3L1.L002-002.SeqL002-002_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA660ADXX.3L1.L002-002.SeqL002-002_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/HG003.HA660ADXX.3L1/SAMtools-*_GIAB_HG003.HA660ADXX.3L1.L002-005.SeqL002-005_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA660ADXX.3L1.L002-005.SeqL002-005_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/Picard-3.1.0/output/Picard-3.1.0_GIAB_HG003_wgs-metrics.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Picard-3.1.0_GIAB_HG003_wgs-metrics.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/Qualimap-2.3/output/Qualimap-2.3_GIAB_HG003_stats/genome_results.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_GIAB_HG003_stats/genome_results.txt + script: test/assert_txt.sh - name: a_mini-all-tools message: test all tools nf_script: main.nf @@ -14,11 +76,11 @@ cases: skip: false verbose: true asserts: - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/HG002.N/FastQC-*_TWGSAMIN_HG002.N-HG002.N.seq1.Seq1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N-HG002.N.seq1.Seq1_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/S2-v1.1.5/FastQC-*_TWGSAMIN_S2-v1.1.5-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_HG002.N_stats.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_HG002.N_stats.txt @@ -46,14 +108,14 @@ cases: skip: false verbose: true asserts: - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/HG002.N/FastQC-*_TWGSAMIN_HG002.N-HG002.N.seq1.Seq1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N-HG002.N.seq1.Seq1_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/S2-v1.1.5/FastQC-*_TWGSAMIN_S2-v1.1.5-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5.n1/S2-v1.1.5.n1_SMadjust_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5.n1/S2-v1.1.5.n1_SMadjust_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/S2-v1.1.5.n1/FastQC-*_TWGSAMIN_S2-v1.1.5.n1-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5.n1-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_HG002.N_stats.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_HG002.N_stats.txt @@ -90,11 +152,11 @@ cases: skip: true verbose: true asserts: - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N/HG002.N_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/HG002.N/FastQC-*_TWGSAMIN_HG002.N-HG002.N.seq1.Seq1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N-HG002.N.seq1.Seq1_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/FastQC-*_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5/S2-v1.1.5_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/S2-v1.1.5/FastQC-*_TWGSAMIN_S2-v1.1.5-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt script: test/assert_txt.sh - name: a_mini-stats message: test samtools stats From d049c147671a2e67c3a17deceb56b5b0b0c1957f Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Tue, 28 May 2024 19:12:18 -0700 Subject: [PATCH 19/45] update samtools --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d05d86..6bdeeba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm ### Changed - Update SAMtools 1.18 to 1.20 -- Update NFTest for FastQC +- Update NFTest for FastQC and new test sample - Update repository/pipeline description - Update Nextflow configuration test workflows From 53632de42b88ddfb464580258936b5bea2b59e8e Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 5 Jun 2024 11:57:38 -0700 Subject: [PATCH 20/45] pull main - samtools update --- config/default.config | 1 + 1 file changed, 1 insertion(+) diff --git a/config/default.config b/config/default.config index cecc8e9..e235f16 100644 --- a/config/default.config +++ b/config/default.config @@ -18,6 +18,7 @@ params { pipeval_version = "4.0.0-rc.2" fastqc_version = "0.12.1_samtools-1.20" samtools_version = "1.20" + fastqc_version = "0.12.1" picard_version = "3.1.0" qualimap_version = "2.3" docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}" From 19b68a2bc31e376d2671e795ee257d15a7f8fa74 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Thu, 13 Jun 2024 14:54:30 -0700 Subject: [PATCH 21/45] use fastqc docker with samtools --- config/default.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/default.config b/config/default.config index e235f16..96b6f4d 100644 --- a/config/default.config +++ b/config/default.config @@ -18,7 +18,7 @@ params { pipeval_version = "4.0.0-rc.2" fastqc_version = "0.12.1_samtools-1.20" samtools_version = "1.20" - fastqc_version = "0.12.1" + fastqc_version = "0.12.1_samtools-1.20" picard_version = "3.1.0" qualimap_version = "2.3" docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}" From 0e2c3df146e5690a5bcebf4182428d18bcd872d9 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Fri, 14 Jun 2024 14:20:35 -0700 Subject: [PATCH 22/45] add fastqc resource allocations --- config/F16.config | 10 ++++++++++ config/F2.config | 10 ++++++++++ config/F32.config | 10 ++++++++++ config/F4.config | 10 ++++++++++ config/F72.config | 10 ++++++++++ config/F8.config | 10 ++++++++++ 6 files changed, 60 insertions(+) diff --git a/config/F16.config b/config/F16.config index 41f2766..1ac4969 100644 --- a/config/F16.config +++ b/config/F16.config @@ -3,6 +3,16 @@ process { cpus = 1 memory = 250.MB } + withName: assess_ReadQuality_FastQC { + cpus = 1 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } withName: run_stats_SAMtools_readgroup { cpus = 1 memory = 1.GB diff --git a/config/F2.config b/config/F2.config index 8b3f4bb..0558f22 100644 --- a/config/F2.config +++ b/config/F2.config @@ -3,6 +3,16 @@ process { cpus = 1 memory = 250.MB } + withName: assess_ReadQuality_FastQC { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } withName: run_stats_SAMtools_readgroup { cpus = 1 memory = 1500.MB diff --git a/config/F32.config b/config/F32.config index 2ec47c3..d31a72f 100644 --- a/config/F32.config +++ b/config/F32.config @@ -3,6 +3,16 @@ process { cpus = 1 memory = 250.MB } + withName: assess_ReadQuality_FastQC { + cpus = 1 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } withName: run_stats_SAMtools_readgroup { cpus = 1 memory = 1.GB diff --git a/config/F4.config b/config/F4.config index e12d98b..3f01040 100644 --- a/config/F4.config +++ b/config/F4.config @@ -3,6 +3,16 @@ process { cpus = 1 memory = 250.MB } + withName: assess_ReadQuality_FastQC { + cpus = 1 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 3.GB + } + } + } withName: run_stats_SAMtools_readgroup { cpus = 1 memory = 1.GB diff --git a/config/F72.config b/config/F72.config index 0220198..32b68b9 100644 --- a/config/F72.config +++ b/config/F72.config @@ -3,6 +3,16 @@ process { cpus = 1 memory = 250.MB } + withName: assess_ReadQuality_FastQC { + cpus = 1 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } withName: run_stats_SAMtools_readgroup { cpus = 1 memory = 1.GB diff --git a/config/F8.config b/config/F8.config index 0d77405..8a2530e 100644 --- a/config/F8.config +++ b/config/F8.config @@ -3,6 +3,16 @@ process { cpus = 1 memory = 250.MB } + withName: assess_ReadQuality_FastQC { + cpus = 1 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } withName: run_stats_SAMtools_readgroup { cpus = 1 memory = 1.GB From f95d47ea0510b2e8846b916dcadb784bb7fefbe3 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Sun, 16 Jun 2024 16:19:06 -0700 Subject: [PATCH 23/45] add fastqc threading and adjust resources --- config/F16.config | 2 +- config/F32.config | 2 +- config/F4.config | 2 +- config/F72.config | 2 +- config/F8.config | 2 +- config/M64.config | 10 ++++++++++ module/fastqc.nf | 5 +++-- 7 files changed, 18 insertions(+), 7 deletions(-) diff --git a/config/F16.config b/config/F16.config index 1ac4969..005d44a 100644 --- a/config/F16.config +++ b/config/F16.config @@ -4,7 +4,7 @@ process { memory = 250.MB } withName: assess_ReadQuality_FastQC { - cpus = 1 + cpus = 2 memory = 1.GB retry_strategy { memory { diff --git a/config/F32.config b/config/F32.config index d31a72f..ee2dbfa 100644 --- a/config/F32.config +++ b/config/F32.config @@ -4,7 +4,7 @@ process { memory = 250.MB } withName: assess_ReadQuality_FastQC { - cpus = 1 + cpus = 2 memory = 1.GB retry_strategy { memory { diff --git a/config/F4.config b/config/F4.config index 3f01040..5b45d6d 100644 --- a/config/F4.config +++ b/config/F4.config @@ -4,7 +4,7 @@ process { memory = 250.MB } withName: assess_ReadQuality_FastQC { - cpus = 1 + cpus = 2 memory = 1.GB retry_strategy { memory { diff --git a/config/F72.config b/config/F72.config index 32b68b9..a4f4b54 100644 --- a/config/F72.config +++ b/config/F72.config @@ -4,7 +4,7 @@ process { memory = 250.MB } withName: assess_ReadQuality_FastQC { - cpus = 1 + cpus = 2 memory = 1.GB retry_strategy { memory { diff --git a/config/F8.config b/config/F8.config index 8a2530e..bae3dab 100644 --- a/config/F8.config +++ b/config/F8.config @@ -4,7 +4,7 @@ process { memory = 250.MB } withName: assess_ReadQuality_FastQC { - cpus = 1 + cpus = 2 memory = 1.GB retry_strategy { memory { diff --git a/config/M64.config b/config/M64.config index c66dd4c..168b0a1 100644 --- a/config/M64.config +++ b/config/M64.config @@ -3,6 +3,16 @@ process { cpus = 1 memory = 250.MB } + withName: assess_ReadQuality_FastQC { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } withName: run_stats_SAMtools_readgroup { cpus = 1 memory = 1.GB diff --git a/module/fastqc.nf b/module/fastqc.nf index b2e9f2c..04b19dd 100644 --- a/module/fastqc.nf +++ b/module/fastqc.nf @@ -32,9 +32,10 @@ process assess_ReadQuality_FastQC { """ set -euo pipefail mkdir -p ${sm_id} - samtools view -F 0x900 -h ${rg_arg} ${path} | \ - samtools fastq | \ + samtools view --threads ${task.cpus} --excl-flags 0x900 --with-header ${rg_arg} ${path} | \ + samtools fastq --threads ${task.cpus} | \ fastqc \ + --threads ${task.cpus} \ --outdir "${sm_id}" \ --format fastq \ --extract \ From da059b8eb29e344767057bc5d5eaf29c3d87f652 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 5 Jun 2024 11:57:38 -0700 Subject: [PATCH 24/45] add fastqc --- config/default.config | 1 - 1 file changed, 1 deletion(-) diff --git a/config/default.config b/config/default.config index 96b6f4d..cecc8e9 100644 --- a/config/default.config +++ b/config/default.config @@ -18,7 +18,6 @@ params { pipeval_version = "4.0.0-rc.2" fastqc_version = "0.12.1_samtools-1.20" samtools_version = "1.20" - fastqc_version = "0.12.1_samtools-1.20" picard_version = "3.1.0" qualimap_version = "2.3" docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}" From 8f92e3641a6a6847313c4cb03d2e8473bf46e64e Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Mon, 17 Jun 2024 11:01:37 -0700 Subject: [PATCH 25/45] add fastqc to metadata.yaml --- metadata.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata.yaml b/metadata.yaml index e47a163..3be63f4 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -5,4 +5,4 @@ maintainers: "Boutros Lab Infrastructure Date: Sat, 15 Jun 2024 00:07:03 +0000 Subject: [PATCH 26/45] Bump the pipeline-submodules group with 2 updates Bumps the pipeline-submodules group with 2 updates: [external/pipeline-Nextflow-module](https://github.com/uclahs-cds/pipeline-Nextflow-module) and [external/pipeline-Nextflow-config](https://github.com/uclahs-cds/pipeline-Nextflow-config). Updates `external/pipeline-Nextflow-module` from `ef40c10` to `537da32` - [Commits](https://github.com/uclahs-cds/pipeline-Nextflow-module/compare/ef40c100005466b7d44f2fd60adb2c60ef451c4f...537da32a6c6160097ae4ab0b8aba4923ec969401) Updates `external/pipeline-Nextflow-config` from `be1eaf5` to `2127286` - [Release notes](https://github.com/uclahs-cds/pipeline-Nextflow-config/releases) - [Commits](https://github.com/uclahs-cds/pipeline-Nextflow-config/compare/be1eaf5e2831fb4d4c4e9bcef2a4b9618dfae8cd...2127286bddae814c267ecc85e9cc30a0f1ab00d0) --- updated-dependencies: - dependency-name: external/pipeline-Nextflow-module dependency-type: direct:production dependency-group: pipeline-submodules - dependency-name: external/pipeline-Nextflow-config dependency-type: direct:production dependency-group: pipeline-submodules ... Signed-off-by: dependabot[bot] --- external/pipeline-Nextflow-config | 2 +- external/pipeline-Nextflow-module | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/external/pipeline-Nextflow-config b/external/pipeline-Nextflow-config index be1eaf5..2127286 160000 --- a/external/pipeline-Nextflow-config +++ b/external/pipeline-Nextflow-config @@ -1 +1 @@ -Subproject commit be1eaf5e2831fb4d4c4e9bcef2a4b9618dfae8cd +Subproject commit 2127286bddae814c267ecc85e9cc30a0f1ab00d0 diff --git a/external/pipeline-Nextflow-module b/external/pipeline-Nextflow-module index ef40c10..537da32 160000 --- a/external/pipeline-Nextflow-module +++ b/external/pipeline-Nextflow-module @@ -1 +1 @@ -Subproject commit ef40c100005466b7d44f2fd60adb2c60ef451c4f +Subproject commit 537da32a6c6160097ae4ab0b8aba4923ec969401 From 198b40c676e857cbd446bd979f4c40088c26cdd9 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Thu, 20 Jun 2024 15:59:59 -0700 Subject: [PATCH 27/45] add final newline --- module/stats_samtools.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/stats_samtools.nf b/module/stats_samtools.nf index 0303d39..a5244dc 100644 --- a/module/stats_samtools.nf +++ b/module/stats_samtools.nf @@ -50,4 +50,4 @@ process run_stats_SAMtools { set -euo pipefail samtools view -h ${rg_arg} ${path} | samtools stats ${rmdups} ${params.samtools_stats_additional_options} > ${output_filename}_stats.txt """ -} \ No newline at end of file +} From 91bf337fec2f7977908206333baa9572195c42f1 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Mon, 24 Jun 2024 16:58:04 -0700 Subject: [PATCH 28/45] update readme --- README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 1edd32f..8fb0295 100644 --- a/README.md +++ b/README.md @@ -80,8 +80,10 @@ input: #### SAMtools specific configuration | Field | Type | Required | Description | | ----- | ---- | ------------ | ------------------------ | -| remove_duplicates | boolean | no | Ignore reads marked as duplicate. default = `false` | -| samtools_stats_additional_options | string | no | Any additional options recognized by `samtools stats` | +| stats_max_rgs_to_process_separately | integer | no | If a sample has more than this number of readgroups, `SAMtools stats` will not run per readgroup analysis. Default = 20 | +| stats_max_libs_to_process_separately | integer | no | If a sample has more than this number of libraries, `SAMtools stats` will not run per library analysis. Default = 20 | +| stats_remove_duplicates | boolean | no | Ignore reads marked as duplicate. default = `false` | +| stats_additional_options | string | no | Any additional options recognized by `samtools stats` | #### Picard specific configuration | Field | Type | Required | Description | From 4e2b72471af8417835194d7a8bbf9ec41fe5989e Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Mon, 24 Jun 2024 16:58:26 -0700 Subject: [PATCH 29/45] update nftest.yml --- nftest.yml | 76 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 32 deletions(-) diff --git a/nftest.yml b/nftest.yml index a5e4ef6..b885e8c 100644 --- a/nftest.yml +++ b/nftest.yml @@ -14,53 +14,65 @@ cases: skip: false verbose: true asserts: - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/HG003/FastQC-*_GIAB_HG003-HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-sample/FastQC-*_GIAB_HG003_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/HG003/FastQC-*_GIAB_HG003-HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-library/FastQC-*_GIAB_HG003-HG003.H9YY4ADXX.3F1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.H9YY4ADXX.3F1_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/HG003/FastQC-*_GIAB_HG003-HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-library/FastQC-*_GIAB_HG003-HG003.HA0L6ADXX.3F1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/HG003/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1.L001-001.SeqL001-001_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1.L001-001.SeqL001-001_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-library/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/HG003/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1.L002-002.SeqL002-002_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1.L002-002.SeqL002-002_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.H9YY4ADXX.3F1-HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.H9YY4ADXX.3F1-HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/HG003/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1.L002-005.SeqL002-005_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1.L002-005.SeqL002-005_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/SAMtools-*_GIAB_HG003_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003_stats.txt + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/SAMtools-*_GIAB_HG003.H9YY4ADXX.3F1_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.H9YY4ADXX.3F1_stats.txt + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L001-001.SeqL001-001_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L001-001.SeqL001-001_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/SAMtools-*_GIAB_HG003.HA0L6ADXX.3F1_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA0L6ADXX.3F1_stats.txt + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-002.SeqL002-002_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-002.SeqL002-002_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/SAMtools-*_GIAB_HG003.HA660ADXX.3L1_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA660ADXX.3L1_stats.txt + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-005.SeqL002-005_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-005.SeqL002-005_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/HG003.H9YY4ADXX.3F1/SAMtools-*_GIAB_HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_stats.txt + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/by-sample/SAMtools-*_GIAB_HG003_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_GIAB_HG003_stats.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/HG003.HA0L6ADXX.3F1/SAMtools-*_GIAB_HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_stats.txt + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/by-library/SAMtools-*_GIAB_HG003-HG003.H9YY4ADXX.3F1_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_GIAB_HG003-HG003.H9YY4ADXX.3F1_stats.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/HG003.HA0L6ADXX.3F1/SAMtools-*_GIAB_HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_stats.txt + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/by-library/SAMtools-*_GIAB_HG003-HG003.HA0L6ADXX.3F1_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1_stats.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/HG003.HA660ADXX.3L1/SAMtools-*_GIAB_HG003.HA660ADXX.3L1.L001-001.SeqL001-001_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA660ADXX.3L1.L001-001.SeqL001-001_stats.txt + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/by-library/SAMtools-*_GIAB_HG003-HG003.HA660ADXX.3L1_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1_stats.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/HG003.HA660ADXX.3L1/SAMtools-*_GIAB_HG003.HA660ADXX.3L1.L002-002.SeqL002-002_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA660ADXX.3L1.L002-002.SeqL002-002_stats.txt + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/by-readgroup/SAMtools-*_GIAB_HG003-HG003.H9YY4ADXX.3F1-HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_GIAB_HG003-HG003.H9YY4ADXX.3F1-HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_stats.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/HG003/HG003.HA660ADXX.3L1/SAMtools-*_GIAB_HG003.HA660ADXX.3L1.L002-005.SeqL002-005_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_GIAB_HG003.HA660ADXX.3L1.L002-005.SeqL002-005_stats.txt + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/by-readgroup/SAMtools-*_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/by-readgroup/SAMtools-*_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/by-readgroup/SAMtools-*_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L001-001.SeqL001-001_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L001-001.SeqL001-001_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/by-readgroup/SAMtools-*_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-002.SeqL002-002_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-002.SeqL002-002_stats.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/by-readgroup/SAMtools-*_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-005.SeqL002-005_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-005.SeqL002-005_stats.txt script: test/assert_txt.sh - actual: generate-SQC-BAM-*/NA24149/Picard-3.1.0/output/Picard-3.1.0_GIAB_HG003_wgs-metrics.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Picard-3.1.0_GIAB_HG003_wgs-metrics.txt From 01b50bffbe81d993749821856150765b0b8f76fd Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Mon, 24 Jun 2024 17:01:57 -0700 Subject: [PATCH 30/45] parameterize fastqc and add max gps to stats --- config/F16.config | 22 +++++++++++++- config/F2.config | 22 +++++++++++++- config/F32.config | 22 +++++++++++++- config/F4.config | 24 ++++++++++++++-- config/F72.config | 22 +++++++++++++- config/F8.config | 22 +++++++++++++- config/M64.config | 22 +++++++++++++- config/schema.yaml | 24 ++++++++++++++-- config/template.config | 6 ++-- main.nf | 62 +++++++++++++++++++++++++++++----------- module/fastqc.nf | 28 ++++++++++++------ module/stats_samtools.nf | 20 ++++++------- 12 files changed, 248 insertions(+), 48 deletions(-) diff --git a/config/F16.config b/config/F16.config index 005d44a..70d8990 100644 --- a/config/F16.config +++ b/config/F16.config @@ -3,7 +3,27 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC { + withName: assess_ReadQuality_FastQC_readgroup { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } + withName: assess_ReadQuality_FastQC_library { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } + withName: assess_ReadQuality_FastQC_sample { cpus = 2 memory = 1.GB retry_strategy { diff --git a/config/F2.config b/config/F2.config index 0558f22..30a5751 100644 --- a/config/F2.config +++ b/config/F2.config @@ -3,7 +3,27 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC { + withName: assess_ReadQuality_FastQC_readgroup { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } + withName: assess_ReadQuality_FastQC_library { + cpus = 1 + memory = 1500.MB + retry_strategy { + memory { + strategy = 'add' + operand = 2000.MB + } + } + } + withName: assess_ReadQuality_FastQC_sample { cpus = 1 memory = 1500.MB retry_strategy { diff --git a/config/F32.config b/config/F32.config index ee2dbfa..d8b87cf 100644 --- a/config/F32.config +++ b/config/F32.config @@ -3,7 +3,27 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC { + withName: assess_ReadQuality_FastQC_readgroup { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } + withName: assess_ReadQuality_FastQC_library { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } + withName: assess_ReadQuality_FastQC_sample { cpus = 2 memory = 1.GB retry_strategy { diff --git a/config/F4.config b/config/F4.config index 5b45d6d..9b9373f 100644 --- a/config/F4.config +++ b/config/F4.config @@ -3,13 +3,33 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC { + withName: assess_ReadQuality_FastQC_readgroup { cpus = 2 memory = 1.GB retry_strategy { memory { strategy = 'add' - operand = 3.GB + operand = 4.GB + } + } + } + withName: assess_ReadQuality_FastQC_library { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } + withName: assess_ReadQuality_FastQC_sample { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB } } } diff --git a/config/F72.config b/config/F72.config index a4f4b54..0268f16 100644 --- a/config/F72.config +++ b/config/F72.config @@ -3,7 +3,27 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC { + withName: assess_ReadQuality_FastQC_readgroup { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } + withName: assess_ReadQuality_FastQC_library { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } + withName: assess_ReadQuality_FastQC_sample { cpus = 2 memory = 1.GB retry_strategy { diff --git a/config/F8.config b/config/F8.config index bae3dab..c1db89d 100644 --- a/config/F8.config +++ b/config/F8.config @@ -3,7 +3,27 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC { + withName: assess_ReadQuality_FastQC_readgroup { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } + withName: assess_ReadQuality_FastQC_library { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } + withName: assess_ReadQuality_FastQC_sample { cpus = 2 memory = 1.GB retry_strategy { diff --git a/config/M64.config b/config/M64.config index 168b0a1..84f49bb 100644 --- a/config/M64.config +++ b/config/M64.config @@ -3,7 +3,27 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC { + withName: assess_ReadQuality_FastQC_readgroup { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } + withName: assess_ReadQuality_FastQC_library { + cpus = 2 + memory = 1.GB + retry_strategy { + memory { + strategy = 'add' + operand = 4.GB + } + } + } + withName: assess_ReadQuality_FastQC_sample { cpus = 2 memory = 1.GB retry_strategy { diff --git a/config/schema.yaml b/config/schema.yaml index d455dd5..46b7acb 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -40,18 +40,38 @@ save_intermediate_files: required: false default: false help: 'The option to save the intermediate files' +fastqc_level: + type: 'List' + required: false + help: 'FastQC level of analysis' + choices: + - readgroup + - library + - sample + default: + - readgroup fastqc_additional_options: type: 'String' required: false allow_empty: true default: '' help: 'Additional arguments for FastQC command' -samtools_remove_duplicates: +stats_max_rgs_to_process_separately: + type: 'Integer' + required: false + default: 20 + help: 'Maximum number of read groups to process separately' +stats_max_libs_to_process_separately: + type: 'Integer' + required: false + default: 20 + help: 'Maximum number of libraries to process separately' +stats_remove_duplicates: type: 'Bool' required: false default: false help: 'SAMtools stats option to remove duplicates' -samtools_stats_additional_options: +stats_additional_options: type: 'String' required: false allow_empty: true diff --git a/config/template.config b/config/template.config index 39b4196..c537361 100644 --- a/config/template.config +++ b/config/template.config @@ -15,8 +15,9 @@ params { save_intermediate_files = true // SAMtools stats options - samtools_remove_duplicates = false - samtools_stats_additional_options = '' + stats_max_rgs_to_process_separately = 20 + stats_remove_duplicates = false + stats_additional_options = '' // Picard CollectWgsMetrics options cwm_coverage_cap = 1000 @@ -30,6 +31,7 @@ params { bamqc_additional_options = '' // FastQC options + fastqc_level = ['readgroup'] // 'readgroup', 'library', 'sample' fastqc_additional_options = '' // Base resource allocation updater diff --git a/main.nf b/main.nf index a35f92c..4d4f00f 100755 --- a/main.nf +++ b/main.nf @@ -25,6 +25,24 @@ include { run_stats_SAMtools as run_stats_SAMtools_sample } from './module/stats stat_mode: "sample" ) +include { assess_ReadQuality_FastQC as assess_ReadQuality_FastQC_readgroup } from './module/fastqc' addParams( + workflow_output_dir: "${params.output_dir_base}/FastQC-${params.fastqc_version}", + workflow_log_output_dir: "${params.log_output_dir}/process-log/FastQC-${params.fastqc_version}", + stat_mode: "readgroup" + ) + +include { assess_ReadQuality_FastQC as assess_ReadQuality_FastQC_library } from './module/fastqc' addParams( + workflow_output_dir: "${params.output_dir_base}/FastQC-${params.fastqc_version}", + workflow_log_output_dir: "${params.log_output_dir}/process-log/FastQC-${params.fastqc_version}", + stat_mode: "library" + ) + +include { assess_ReadQuality_FastQC as assess_ReadQuality_FastQC_sample } from './module/fastqc' addParams( + workflow_output_dir: "${params.output_dir_base}/FastQC-${params.fastqc_version}", + workflow_log_output_dir: "${params.log_output_dir}/process-log/FastQC-${params.fastqc_version}", + stat_mode: "sample" + ) + include { run_CollectWgsMetrics_Picard } from './module/collectWgsMetrics_picard' addParams( workflow_output_dir: "${params.output_dir_base}/Picard-${params.picard_version}", workflow_log_output_dir: "${params.log_output_dir}/process-log/Picard-${params.picard_version}" @@ -35,10 +53,6 @@ include { run_bamqc_Qualimap } from './module/bamqc_qualimap' addParams( workflow_log_output_dir: "${params.log_output_dir}/process-log/Qualimap-${params.qualimap_version}" ) -include { assess_ReadQuality_FastQC } from './module/fastqc' addParams( - workflow_output_dir: "${params.output_dir_base}/FastQC-${params.fastqc_version}", - workflow_log_output_dir: "${params.log_output_dir}/process-log/FastQC-${params.fastqc_version}" - ) include { indexFile } from './external/pipeline-Nextflow-module/modules/common/indexFile/main.nf' @@ -85,8 +99,10 @@ log.info """\ - samtools stats options: samtools_version: ${params.samtools_version} - samtools_remove_duplicates: ${params.samtools_remove_duplicates} - samtools_stats_additional_options: ${params.samtools_stats_additional_options} + stats_max_rgs_to_process_separately: ${params.stats_max_rgs_to_process_separately} + stats_max_libs_to_process_separately: ${params.stats_max_libs_to_process_separately} + stats_remove_duplicates: ${params.stats_remove_duplicates} + stats_additional_options: ${params.stats_additional_options} - picard CollectWgsMetrics options: picard_version: ${params.picard_version} @@ -154,19 +170,21 @@ if ('collectwgsmetrics' in params.algorithm) { workflow { // Input file validation - run_validate_PipeVal(files_to_validate_ch) - run_validate_PipeVal.out.validation_result.collectFile( - name: 'input_validation.txt', newLine: true, - storeDir: "${params.output_dir_base}/validation" - ) +// run_validate_PipeVal(files_to_validate_ch) +// run_validate_PipeVal.out.validation_result.collectFile( +// name: 'input_validation.txt', newLine: true, +// storeDir: "${params.output_dir_base}/validation" +// ) if ('stats' in params.algorithm) { - if (params.readgroups_to_process.size() > 1) { + if (params.readgroups_to_process.size() > 1 \ + && params.readgroups_to_process.size() <= params.stats_max_rgs_to_process_separately) { run_stats_SAMtools_readgroup( readgroups_to_process_ch ) } - if (params.libraries_to_process.size() > 1) { + if (params.libraries_to_process.size() > 1 \ + && params.libraries_to_process.size() <= params.stats_max_libs_to_process_separately) { run_stats_SAMtools_library( libraries_to_process_ch ) @@ -176,9 +194,21 @@ workflow { ) } if ('fastqc' in params.algorithm) { - assess_ReadQuality_FastQC( - readgroups_to_process_ch - ) + if ('readgroup' in params.fastqc_level) { + assess_ReadQuality_FastQC_readgroup( + readgroups_to_process_ch + ) + } + if ('library' in params.fastqc_level) { + assess_ReadQuality_FastQC_library( + libraries_to_process_ch + ) + } + if ('sample' in params.fastqc_level) { + assess_ReadQuality_FastQC_sample( + samples_to_process_ch + ) + } } if ('collectwgsmetrics' in params.algorithm) { run_CollectWgsMetrics_Picard( diff --git a/module/fastqc.nf b/module/fastqc.nf index 04b19dd..c1d6746 100644 --- a/module/fastqc.nf +++ b/module/fastqc.nf @@ -11,32 +11,44 @@ process assess_ReadQuality_FastQC { container params.docker_image_fastqc publishDir path: "${params.workflow_output_dir}/output", - pattern: "${sm_id}/${output_filename}_fastqc", + pattern: "${outdir}/${output_filename}_fastqc", mode: "copy", enabled: true - ext log_dir_suffix: { "-${target}" } + ext log_dir_suffix: { "-${filename_id}" } input: - tuple path(path), val(unused), val(sm_id), val(rg_arg), val(rg_id), val(unused), val(unused), val(ununsed) + tuple path(path), val(orig_id), val(sm_id), val(rg_arg), val(rg_id), val(lib_id), val(sm_type), val(read_length) output: - path("${sm_id}/${output_filename}_fastqc") + path "${outdir}/${output_filename}_fastqc" + path ".command.*" script: - target = "${sm_id}-${rg_id}" + + if (params.stat_mode == "sample") { + filename_id = sm_id + outdir = "by-sample" + } else if (params.stat_mode == "library") { + filename_id = sm_id + "-" + lib_id + outdir = "by-library" + } else if (params.stat_mode == "readgroup") { + filename_id = sm_id + "-" + lib_id + "-" + rg_id + outdir = "by-readgroup" + } + output_filename = generate_standard_filename("FastQC-${params.fastqc_version}", params.dataset_id, - target, + filename_id, [:]) """ set -euo pipefail - mkdir -p ${sm_id} + mkdir -p ${outdir} samtools view --threads ${task.cpus} --excl-flags 0x900 --with-header ${rg_arg} ${path} | \ samtools fastq --threads ${task.cpus} | \ fastqc \ --threads ${task.cpus} \ - --outdir "${sm_id}" \ + --outdir "${outdir}" \ --format fastq \ --extract \ --delete \ diff --git a/module/stats_samtools.nf b/module/stats_samtools.nf index a5244dc..c8fb860 100644 --- a/module/stats_samtools.nf +++ b/module/stats_samtools.nf @@ -15,8 +15,7 @@ process run_stats_SAMtools { mode: "copy", enabled: true, saveAs: { "${outdir}/${file(it).getName()}" } - - ext log_dir_suffix: { "-${log_suffix}" } + ext log_dir_suffix: { "-${outdir}/${filename_id}" } input: tuple path(path), val(unused), val(sm_id), val(rg_arg), val(rg_id), val(lib_id), val(unused), val(unused) @@ -28,26 +27,23 @@ process run_stats_SAMtools { script: if (params.stat_mode == "sample") { filename_id = sm_id - outdir = "." - log_suffix = "${sm_id}" + outdir = "by-sample" } else if (params.stat_mode == "library") { - filename_id = lib_id - outdir = sm_id - log_suffix = "${sm_id}/${lib_id}" + filename_id = sm_id + "-" + lib_id + outdir = "by-library" } else if (params.stat_mode == "readgroup") { - filename_id = rg_id - outdir = "${sm_id}/${lib_id}" - log_suffix = "${sm_id}/${lib_id}/${rg_id}" + filename_id = sm_id + "-" + lib_id + "-" + rg_id + outdir = "by-readgroup" } output_filename = generate_standard_filename("SAMtools-${params.samtools_version}", params.dataset_id, filename_id, [:]) - rmdups = params.samtools_remove_duplicates ? "--remove-dups" : "" + rmdups = params.stats_remove_duplicates ? "--remove-dups" : "" """ set -euo pipefail - samtools view -h ${rg_arg} ${path} | samtools stats ${rmdups} ${params.samtools_stats_additional_options} > ${output_filename}_stats.txt + samtools view -h ${rg_arg} ${path} | samtools stats ${rmdups} ${params.stats_additional_options} > ${output_filename}_stats.txt """ } From e61b1ac12d38fb4c15a1f00958f772223f9664cf Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Mon, 24 Jun 2024 17:02:36 -0700 Subject: [PATCH 31/45] sanitize library ID --- config/methods.config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/methods.config b/config/methods.config index ec2fe38..2686f20 100644 --- a/config/methods.config +++ b/config/methods.config @@ -39,13 +39,14 @@ methods { ]) } bam_header['read_group'].each { rgMap -> + def lib_id = methods.sanitize_uclahs_cds_id(rgMap['LB']) def rg_id = methods.sanitize_uclahs_cds_id(rgMap['ID']) params.readgroups_to_process.add([ 'path': bam_path, 'sm_id': new_sm_tag, 'orig_rg_id': rgMap['ID'], 'rg_id': rg_id, - 'lib_id': rgMap['LB'] + 'lib_id': lib_id ]) } } From e6d0200cb90994572faf9c102c36d97073b4f9e6 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Mon, 24 Jun 2024 17:03:05 -0700 Subject: [PATCH 32/45] update test configs --- test/config/all-tools.config | 6 ++++-- test/config/fastqc.config | 8 ++++++-- test/config/stats.config | 5 +++-- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/test/config/all-tools.config b/test/config/all-tools.config index 1767142..026f32e 100644 --- a/test/config/all-tools.config +++ b/test/config/all-tools.config @@ -14,8 +14,9 @@ params { save_intermediate_files = true // SAMtools stats options - samtools_remove_duplicates = false - samtools_stats_additional_options = '' + stats_max_rgs_to_process_separately = 20 + stats_remove_duplicates = false + stats_additional_options = '' // Picard CollectWgsMetrics options cwm_coverage_cap = 1000 @@ -29,6 +30,7 @@ params { bamqc_additional_options = '' // FastQC options + fastqc_level = ['readgroup', 'library', 'sample'] // 'readgroup', 'library', 'sample' fastqc_additional_options = '' // Base resource allocation updater diff --git a/test/config/fastqc.config b/test/config/fastqc.config index dce8944..286ba1e 100644 --- a/test/config/fastqc.config +++ b/test/config/fastqc.config @@ -14,8 +14,12 @@ params { save_intermediate_files = true // SAMtools stats options - samtools_remove_duplicates = false - samtools_stats_additional_options = '' + stats_remove_duplicates = false + stats_additional_options = '' + + // FastQC options + fastqc_level = ['readgroup', 'library', 'sample'] // 'readgroup', 'library', 'sample' + fastqc_additional_options = '' // Base resource allocation updater // See README for adding parameters to update the base resource allocations diff --git a/test/config/stats.config b/test/config/stats.config index f53ec65..a8ee81d 100644 --- a/test/config/stats.config +++ b/test/config/stats.config @@ -14,8 +14,9 @@ params { save_intermediate_files = true // SAMtools stats options - samtools_remove_duplicates = false - samtools_stats_additional_options = '' + stats_max_rgs_to_process_separately = 20 + stats_remove_duplicates = false + stats_additional_options = '' // Base resource allocation updater // See README for adding parameters to update the base resource allocations From 91f47d339427c0c445e602f2897a75e4105427d1 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Mon, 24 Jun 2024 17:15:33 -0700 Subject: [PATCH 33/45] change process input variable names --- module/bamqc_qualimap.nf | 2 +- module/collectWgsMetrics_picard.nf | 2 +- module/stats_samtools.nf | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/module/bamqc_qualimap.nf b/module/bamqc_qualimap.nf index 2c4a4b4..1c447e9 100644 --- a/module/bamqc_qualimap.nf +++ b/module/bamqc_qualimap.nf @@ -16,7 +16,7 @@ process run_bamqc_Qualimap { ext log_dir_suffix: { "-${sm_id}" } input: - tuple path(path), val(unused), val(sm_id), val(unused), val(unused), val(unused), val(unused), val(unused) + tuple path(path), val(orig_id), val(sm_id), val(rg_arg), val(rg_id), val(lib_id), val(sm_type), val(read_length) output: path "*_stats", emit: stats diff --git a/module/collectWgsMetrics_picard.nf b/module/collectWgsMetrics_picard.nf index 9890313..f228890 100644 --- a/module/collectWgsMetrics_picard.nf +++ b/module/collectWgsMetrics_picard.nf @@ -17,7 +17,7 @@ process run_CollectWgsMetrics_Picard { ext log_dir_suffix: { "-${sm_id}" } input: - tuple path(path), val(unused), val(sm_id), val(unused), val(unused), val(unused), val(unused), val(read_length) + tuple path(path), val(orig_id), val(sm_id), val(rg_arg), val(rg_id), val(lib_id), val(sm_type), val(read_length) path reference path reference_index diff --git a/module/stats_samtools.nf b/module/stats_samtools.nf index c8fb860..4ba66df 100644 --- a/module/stats_samtools.nf +++ b/module/stats_samtools.nf @@ -18,7 +18,7 @@ process run_stats_SAMtools { ext log_dir_suffix: { "-${outdir}/${filename_id}" } input: - tuple path(path), val(unused), val(sm_id), val(rg_arg), val(rg_id), val(lib_id), val(unused), val(unused) + tuple path(path), val(orig_id), val(sm_id), val(rg_arg), val(rg_id), val(lib_id), val(sm_type), val(read_length) output: path "*stats.txt" From a2f04897462cade757a51b1ee1c9e89498e297a7 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Tue, 25 Jun 2024 10:43:11 -0700 Subject: [PATCH 34/45] add slurm logs and extra test files to .gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 27d8114..2b3f79d 100644 --- a/.gitignore +++ b/.gitignore @@ -80,3 +80,8 @@ work/ *.gz *.tar *.zip + +# Other +test/* +test/*/* +slurm-*.out From fae512d5e397c6f9af138f13f9cc35c624061600 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 26 Jun 2024 14:10:43 -0700 Subject: [PATCH 35/45] update readme --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8fb0295..cef525b 100644 --- a/README.md +++ b/README.md @@ -80,11 +80,17 @@ input: #### SAMtools specific configuration | Field | Type | Required | Description | | ----- | ---- | ------------ | ------------------------ | -| stats_max_rgs_to_process_separately | integer | no | If a sample has more than this number of readgroups, `SAMtools stats` will not run per readgroup analysis. Default = 20 | -| stats_max_libs_to_process_separately | integer | no | If a sample has more than this number of libraries, `SAMtools stats` will not run per library analysis. Default = 20 | +| stats_max_rgs_per_sample | integer | no | If a sample has more than this number of readgroups, `SAMtools stats` will not run per readgroup analysis. Default = 20 | +| stats_max_libs_per_sample | integer | no | If a sample has more than this number of libraries, `SAMtools stats` will not run per library analysis. Default = 20 | | stats_remove_duplicates | boolean | no | Ignore reads marked as duplicate. default = `false` | | stats_additional_options | string | no | Any additional options recognized by `samtools stats` | +#### FastQC specific configuration +| Field | Type | Required | Description | +| ----- | ---- | ------------ | ------------------------ | +| fastqc_level | string | yes | 'readgroup', 'library' or 'sample' | +| fastqc_additional_options | string | no | Any additional options recognized by `FastQC` | + #### Picard specific configuration | Field | Type | Required | Description | | ----- | ---- | ------------ | ------------------------ | From ecf44dde6a16b497e1680caab6fdb91b0beed90e Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 26 Jun 2024 14:13:53 -0700 Subject: [PATCH 36/45] update comments --- config/methods.config | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/config/methods.config b/config/methods.config index 2686f20..65d99c7 100644 --- a/config/methods.config +++ b/config/methods.config @@ -28,6 +28,8 @@ methods { 'read_length': sampleMap.getOrDefault('read_length', null), 'sample_type': k ]) + // collect library level information only for samples with more than one library and + // less than max_libraries_per_sample bam_header['read_group'].collect{ it['LB'] }.unique().each { lib -> def lib_id = methods.sanitize_uclahs_cds_id(lib) def rgs = bam_header['read_group'].findAll{ it['LB'] == lib }.collect{ it['ID'] } @@ -38,6 +40,8 @@ methods { 'lib_id': lib_id ]) } + // collect read group level information only for samples with more than one read group + // and less than max_readgroups_per_sample bam_header['read_group'].each { rgMap -> def lib_id = methods.sanitize_uclahs_cds_id(rgMap['LB']) def rg_id = methods.sanitize_uclahs_cds_id(rgMap['ID']) From 206c7e96fdb3c4b2e65ca8d74ce2956155329c62 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 26 Jun 2024 14:18:52 -0700 Subject: [PATCH 37/45] adjust run level triggers --- config/schema.yaml | 10 +++--- config/template.config | 4 +-- main.nf | 77 +++++++++++++++++++++++++++--------------- 3 files changed, 57 insertions(+), 34 deletions(-) diff --git a/config/schema.yaml b/config/schema.yaml index 46b7acb..f7cf09b 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -41,27 +41,27 @@ save_intermediate_files: default: false help: 'The option to save the intermediate files' fastqc_level: - type: 'List' - required: false + type: 'String' + required: true help: 'FastQC level of analysis' choices: - readgroup - library - sample default: - - readgroup + - sample fastqc_additional_options: type: 'String' required: false allow_empty: true default: '' help: 'Additional arguments for FastQC command' -stats_max_rgs_to_process_separately: +stats_max_rgs_per_sample: type: 'Integer' required: false default: 20 help: 'Maximum number of read groups to process separately' -stats_max_libs_to_process_separately: +stats_max_libs_per_sample: type: 'Integer' required: false default: 20 diff --git a/config/template.config b/config/template.config index c537361..ff3bb64 100644 --- a/config/template.config +++ b/config/template.config @@ -15,7 +15,7 @@ params { save_intermediate_files = true // SAMtools stats options - stats_max_rgs_to_process_separately = 20 + stats_max_rgs_per_sample = 20 stats_remove_duplicates = false stats_additional_options = '' @@ -31,7 +31,7 @@ params { bamqc_additional_options = '' // FastQC options - fastqc_level = ['readgroup'] // 'readgroup', 'library', 'sample' + fastqc_level = 'sample' // 'readgroup', 'library' or 'sample' fastqc_additional_options = '' // Base resource allocation updater diff --git a/main.nf b/main.nf index 4d4f00f..c42b6c9 100755 --- a/main.nf +++ b/main.nf @@ -99,11 +99,16 @@ log.info """\ - samtools stats options: samtools_version: ${params.samtools_version} - stats_max_rgs_to_process_separately: ${params.stats_max_rgs_to_process_separately} - stats_max_libs_to_process_separately: ${params.stats_max_libs_to_process_separately} + stats_max_rgs_per_sample: ${params.stats_max_rgs_per_sample} + stats_max_libs_per_sample: ${params.stats_max_libs_per_sample} stats_remove_duplicates: ${params.stats_remove_duplicates} stats_additional_options: ${params.stats_additional_options} + - FastQC options: + fastqc_version: ${params.fastqc_version} + fastqc_level: ${params.fastqc_level} + fastqc_additional_options: ${params.fastqc_additional_options} + - picard CollectWgsMetrics options: picard_version: ${params.picard_version} cwm_minimum_coverage_cap: ${params.cwm_coverage_cap} @@ -116,10 +121,6 @@ log.info """\ qualimap_version: ${params.qualimap_version} bamqc_outformat: ${params.bamqc_outformat} bamqc_additional_options: ${params.bamqc_additional_options} - - - FastQC options: - fastqc_version: ${params.fastqc_version} - fastqc_additional_options: ${params.fastqc_additional_options} """ Channel @@ -146,6 +147,35 @@ Channel } .set { readgroups_to_process_ch } +// For stats, avoid re-processing samples with only one library or readgroup, +// and samples with more than the specified maximum number of libraries or readgroups +def stats_libraries = params.libraries_to_process.findAll { lib -> + def sample_libs = params.libraries_to_process.findAll { it.sm_id == lib.sm_id } + return sample_libs.size() > 1 && sample_libs.size() <= params.stats_max_libs_per_sample + } + +Channel + .fromList(stats_libraries) + .map { lib -> + def rg_arg = lib.rgs.collect { "-r ${it}" }.join(' ') + return tuple(lib.path, null, lib.sm_id, rg_arg, null, lib.lib_id, null, null) + } + .set { stats_libraries_ch } + +def stats_readgroups = params.readgroups_to_process.findAll { rg -> + def sample_rgs = params.readgroups_to_process.findAll { it.sm_id == rg.sm_id } + return sample_rgs.size() > 1 && sample_rgs.size() <= params.stats_max_rgs_per_sample + } + +Channel + .fromList(stats_readgroups) + .map { rg -> + def rg_arg = "-r ${rg.orig_rg_id}" + return tuple(rg.path, null, rg.sm_id, rg_arg, rg.rg_id, rg.lib_id, null, null) + } + .set { stats_readgroups_ch } + +// Set up file validation channel Channel .fromList(params.samples_to_process) .map{ it -> [it['path'], indexFile(it['path'])] } @@ -169,42 +199,35 @@ if ('collectwgsmetrics' in params.algorithm) { } workflow { - // Input file validation -// run_validate_PipeVal(files_to_validate_ch) -// run_validate_PipeVal.out.validation_result.collectFile( -// name: 'input_validation.txt', newLine: true, -// storeDir: "${params.output_dir_base}/validation" -// ) + run_validate_PipeVal(files_to_validate_ch) + run_validate_PipeVal.out.validation_result.collectFile( + name: 'input_validation.txt', newLine: true, + storeDir: "${params.output_dir_base}/validation" + ) if ('stats' in params.algorithm) { - if (params.readgroups_to_process.size() > 1 \ - && params.readgroups_to_process.size() <= params.stats_max_rgs_to_process_separately) { - run_stats_SAMtools_readgroup( - readgroups_to_process_ch - ) - } - if (params.libraries_to_process.size() > 1 \ - && params.libraries_to_process.size() <= params.stats_max_libs_to_process_separately) { - run_stats_SAMtools_library( - libraries_to_process_ch - ) - } + run_stats_SAMtools_readgroup( + stats_readgroups_ch + ) + run_stats_SAMtools_library( + stats_libraries_ch + ) run_stats_SAMtools_sample( samples_to_process_ch ) } if ('fastqc' in params.algorithm) { - if ('readgroup' in params.fastqc_level) { + if (params.fastqc_level == 'readgroup') { assess_ReadQuality_FastQC_readgroup( readgroups_to_process_ch ) } - if ('library' in params.fastqc_level) { + else if (params.fastqc_level == 'library') { assess_ReadQuality_FastQC_library( libraries_to_process_ch ) } - if ('sample' in params.fastqc_level) { + else if (params.fastqc_level == 'sample') { assess_ReadQuality_FastQC_sample( samples_to_process_ch ) From 8587520f5f882a3b48f73dbf77196acceb419c5b Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 26 Jun 2024 14:19:15 -0700 Subject: [PATCH 38/45] adjust log filename --- module/stats_samtools.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/module/stats_samtools.nf b/module/stats_samtools.nf index 4ba66df..c747f59 100644 --- a/module/stats_samtools.nf +++ b/module/stats_samtools.nf @@ -4,7 +4,6 @@ * */ - include { generate_standard_filename } from '../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf' process run_stats_SAMtools { @@ -15,7 +14,7 @@ process run_stats_SAMtools { mode: "copy", enabled: true, saveAs: { "${outdir}/${file(it).getName()}" } - ext log_dir_suffix: { "-${outdir}/${filename_id}" } + ext log_dir_suffix: { "-${filename_id}" } input: tuple path(path), val(orig_id), val(sm_id), val(rg_arg), val(rg_id), val(lib_id), val(sm_type), val(read_length) From 137874fb96c3c2d5cc1414b98433e64eef1cf880 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 26 Jun 2024 14:20:06 -0700 Subject: [PATCH 39/45] update nftests --- nftest.yml | 119 +++++++++++++++++------------------ test/config/all-tools.config | 4 +- test/config/fastqc.config | 29 --------- test/config/stats.config | 2 +- 4 files changed, 62 insertions(+), 92 deletions(-) delete mode 100644 test/config/fastqc.config diff --git a/nftest.yml b/nftest.yml index b885e8c..0e9b800 100644 --- a/nftest.yml +++ b/nftest.yml @@ -7,7 +7,7 @@ global: cases: - name: hg003-all-tools - message: test all tools with downsampled HG003 subsetted to 6 readgroups and 3 libraries + message: test all tools with downsampled HG003 subsetted to 6 readgroups and 3 libraries; sample level fastqc nf_script: main.nf nf_config: test/config/all-tools.config params_file: test/yaml/HG003_0.05x-selected-readgroups.yaml @@ -17,33 +17,6 @@ cases: - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-sample/FastQC-*_GIAB_HG003_fastqc/fastqc_data.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-library/FastQC-*_GIAB_HG003-HG003.H9YY4ADXX.3F1_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.H9YY4ADXX.3F1_fastqc/fastqc_data.txt - script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-library/FastQC-*_GIAB_HG003-HG003.HA0L6ADXX.3F1_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1_fastqc/fastqc_data.txt - script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-library/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1_fastqc/fastqc_data.txt - script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.H9YY4ADXX.3F1-HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.H9YY4ADXX.3F1-HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt - script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_fastqc/fastqc_data.txt - script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt - script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L001-001.SeqL001-001_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L001-001.SeqL001-001_fastqc/fastqc_data.txt - script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-002.SeqL002-002_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-002.SeqL002-002_fastqc/fastqc_data.txt - script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-005.SeqL002-005_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-005.SeqL002-005_fastqc/fastqc_data.txt - script: test/assert_txt.sh - actual: generate-SQC-BAM-*/NA24149/SAMtools-*/output/by-sample/SAMtools-*_GIAB_HG003_stats.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_GIAB_HG003_stats.txt script: test/assert_txt.sh @@ -81,24 +54,24 @@ cases: expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_GIAB_HG003_stats/genome_results.txt script: test/assert_txt.sh - name: a_mini-all-tools - message: test all tools + message: test all tools with a_mini, single readgroup normal and tumor nf_script: main.nf nf_config: test/config/all-tools.config params_file: test/yaml/a_mini.yaml skip: false verbose: true asserts: - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/HG002.N/FastQC-*_TWGSAMIN_HG002.N-HG002.N.seq1.Seq1_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N-HG002.N.seq1.Seq1_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/by-sample/FastQC-*_TWGSAMIN_HG002.N_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/S2-v1.1.5/FastQC-*_TWGSAMIN_S2-v1.1.5-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/by-sample/FastQC-*_TWGSAMIN_S2-v1.1.5_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_HG002.N_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_HG002.N_stats.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_HG002.N_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_HG002.N_stats.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_S2-v1.1.5_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_S2-v1.1.5_stats.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_S2-v1.1.5_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_S2-v1.1.5_stats.txt script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/Picard-*/output/Picard-*_TWGSAMIN_HG002.N_wgs-metrics.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Picard-3.1.0_TWGSAMIN_HG002.N_wgs-metrics.txt @@ -113,30 +86,30 @@ cases: expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_TWGSAMIN_S2-v1.1.5_stats/genome_results.txt method: md5 - name: a_mini-all-tools-multiple-samples - message: test all tools with 1 normal and 2 tumor samples + message: test all tools with 1 normal and 2 tumor samples (single readgroups) nf_script: main.nf nf_config: test/config/all-tools.config params_file: test/yaml/a_mini-multiple-samples.yaml skip: false verbose: true asserts: - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/HG002.N/FastQC-*_TWGSAMIN_HG002.N-HG002.N.seq1.Seq1_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N-HG002.N.seq1.Seq1_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/by-sample/FastQC-*_TWGSAMIN_HG002.N_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/S2-v1.1.5/FastQC-*_TWGSAMIN_S2-v1.1.5-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/by-sample/FastQC-*_TWGSAMIN_S2-v1.1.5_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/S2-v1.1.5.n1/FastQC-*_TWGSAMIN_S2-v1.1.5.n1-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5.n1-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/by-sample/FastQC-*_TWGSAMIN_S2-v1.1.5.n1_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5.n1_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_HG002.N_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_HG002.N_stats.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_HG002.N_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_HG002.N_stats.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_S2-v1.1.5_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_S2-v1.1.5_stats.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_S2-v1.1.5_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_S2-v1.1.5_stats.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_S2-v1.1.5.n1_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_S2-v1.1.5.n1_stats.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_S2-v1.1.5.n1_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_S2-v1.1.5.n1_stats.txt script: test/assert_txt.sh - actual: generate-SQC-BAM-*/TWGSAMIN000001/Picard-*/output/Picard-*_TWGSAMIN_HG002.N_wgs-metrics.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Picard-3.1.0_TWGSAMIN_HG002.N_wgs-metrics.txt @@ -156,19 +129,45 @@ cases: - actual: generate-SQC-BAM-*/TWGSAMIN000001/Qualimap-*/output/Qualimap-*_TWGSAMIN_S2-v1.1.5.n1_stats/genome_results.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_TWGSAMIN_S2-v1.1.5.n1_stats/genome_results.txt method: md5 + - name: hg003-fastqc + message: test fastqc with downsampled HG003 subsetted to 6 readgroups and 3 libraries, readgroup level + nf_script: main.nf + nf_config: test/config/fastqc-readgroup.config + params_file: test/yaml/HG003_0.05x-selected-readgroups.yaml + skip: true + verbose: true + asserts: + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.H9YY4ADXX.3F1-HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.H9YY4ADXX.3F1-HG003.H9YY4ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L001-001.SeqL001-001_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA0L6ADXX.3F1-HG003.HA0L6ADXX.3F1.L002-001.SeqL002-001_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L001-001.SeqL001-001_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L001-001.SeqL001-001_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-002.SeqL002-002_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-002.SeqL002-002_fastqc/fastqc_data.txt + script: test/assert_txt.sh + - actual: generate-SQC-BAM-*/NA24149/FastQC-*/output/by-readgroup/FastQC-*_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-005.SeqL002-005_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_GIAB_HG003-HG003.HA660ADXX.3L1-HG003.HA660ADXX.3L1.L002-005.SeqL002-005_fastqc/fastqc_data.txt + script: test/assert_txt.sh - name: a_mini-fastqc - message: test fastqc + message: test fastqc with a_mini normal and tumor, sample level nf_script: main.nf - nf_config: test/config/fastqc.config + nf_config: test/config/fastqc-sample.config params_file: test/yaml/a_mini.yaml skip: true verbose: true asserts: - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/HG002.N/FastQC-*_TWGSAMIN_HG002.N-HG002.N.seq1.Seq1_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N-HG002.N.seq1.Seq1_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/by-sample/FastQC-*_TWGSAMIN_HG002.N_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_HG002.N_fastqc/fastqc_data.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/S2-v1.1.5/FastQC-*_TWGSAMIN_S2-v1.1.5-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5-S2-v1.1.5.seq1.Seq1_fastqc/fastqc_data.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/FastQC-*/output/by-sample/FastQC-*_TWGSAMIN_S2-v1.1.5_fastqc/fastqc_data.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/FastQC-0.12.1-samtools-1.20_TWGSAMIN_S2-v1.1.5_fastqc/fastqc_data.txt script: test/assert_txt.sh - name: a_mini-stats message: test samtools stats @@ -178,11 +177,11 @@ cases: skip: true verbose: true asserts: - - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_HG002.N_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_HG002.N_stats.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_HG002.N_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_HG002.N_stats.txt script: test/assert_txt.sh - - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/SAMtools-*_TWGSAMIN_S2-v1.1.5_stats.txt - expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.18_TWGSAMIN_S2-v1.1.5_stats.txt + - actual: generate-SQC-BAM-*/TWGSAMIN000001/SAMtools-*/output/by-sample/SAMtools-*_TWGSAMIN_S2-v1.1.5_stats.txt + expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/SAMtools-1.20_TWGSAMIN_S2-v1.1.5_stats.txt script: test/assert_txt.sh - name: a_mini-collectwgsmetrics message: test collectwgsmetrics diff --git a/test/config/all-tools.config b/test/config/all-tools.config index 026f32e..d1bcddc 100644 --- a/test/config/all-tools.config +++ b/test/config/all-tools.config @@ -14,7 +14,7 @@ params { save_intermediate_files = true // SAMtools stats options - stats_max_rgs_to_process_separately = 20 + stats_max_rgs_per_sample = 20 stats_remove_duplicates = false stats_additional_options = '' @@ -30,7 +30,7 @@ params { bamqc_additional_options = '' // FastQC options - fastqc_level = ['readgroup', 'library', 'sample'] // 'readgroup', 'library', 'sample' + fastqc_level = 'sample' // 'readgroup', 'library' or 'sample' fastqc_additional_options = '' // Base resource allocation updater diff --git a/test/config/fastqc.config b/test/config/fastqc.config deleted file mode 100644 index 286ba1e..0000000 --- a/test/config/fastqc.config +++ /dev/null @@ -1,29 +0,0 @@ -// EXECUTION SETTINGS AND GLOBAL DEFAULTS - -// External config files import. DO NOT MODIFY THESE LINES! -includeConfig "${projectDir}/config/default.config" -includeConfig "${projectDir}/config/methods.config" -includeConfig "${projectDir}/nextflow.config" - - -// Inputs/parameters of the pipeline -params { - algorithm = ['fastqc'] - reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' - blcds_registered_dataset = false // if you want the output to be registered - save_intermediate_files = true - - // SAMtools stats options - stats_remove_duplicates = false - stats_additional_options = '' - - // FastQC options - fastqc_level = ['readgroup', 'library', 'sample'] // 'readgroup', 'library', 'sample' - fastqc_additional_options = '' - - // Base resource allocation updater - // See README for adding parameters to update the base resource allocations -} - -// Setup the pipeline config. DO NOT REMOVE THIS LINE! -methods.setup() diff --git a/test/config/stats.config b/test/config/stats.config index a8ee81d..644f6ca 100644 --- a/test/config/stats.config +++ b/test/config/stats.config @@ -14,7 +14,7 @@ params { save_intermediate_files = true // SAMtools stats options - stats_max_rgs_to_process_separately = 20 + stats_max_rgs_per_sample = 20 stats_remove_duplicates = false stats_additional_options = '' From 6ad240e11482323c3e62ca6b45523fc31e53ae76 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Wed, 26 Jun 2024 14:38:27 -0700 Subject: [PATCH 40/45] fix test name --- nftest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nftest.yml b/nftest.yml index 0e9b800..054cb2c 100644 --- a/nftest.yml +++ b/nftest.yml @@ -85,7 +85,7 @@ cases: - actual: generate-SQC-BAM-*/TWGSAMIN000001/Qualimap-*/output/Qualimap-*_TWGSAMIN_S2-v1.1.5_stats/genome_results.txt expect: /hot/software/pipeline/pipeline-generate-SQC-BAM/Nextflow/development/test-output/Qualimap-2.3_TWGSAMIN_S2-v1.1.5_stats/genome_results.txt method: md5 - - name: a_mini-all-tools-multiple-samples + - name: a_mini-multiple-samples-all-tools message: test all tools with 1 normal and 2 tumor samples (single readgroups) nf_script: main.nf nf_config: test/config/all-tools.config From 86cc4b23ede7b08d403a90ec7158a69d80f05e51 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Thu, 27 Jun 2024 09:14:31 -0700 Subject: [PATCH 41/45] remove out of date comments --- config/methods.config | 4 ---- 1 file changed, 4 deletions(-) diff --git a/config/methods.config b/config/methods.config index 65d99c7..2686f20 100644 --- a/config/methods.config +++ b/config/methods.config @@ -28,8 +28,6 @@ methods { 'read_length': sampleMap.getOrDefault('read_length', null), 'sample_type': k ]) - // collect library level information only for samples with more than one library and - // less than max_libraries_per_sample bam_header['read_group'].collect{ it['LB'] }.unique().each { lib -> def lib_id = methods.sanitize_uclahs_cds_id(lib) def rgs = bam_header['read_group'].findAll{ it['LB'] == lib }.collect{ it['ID'] } @@ -40,8 +38,6 @@ methods { 'lib_id': lib_id ]) } - // collect read group level information only for samples with more than one read group - // and less than max_readgroups_per_sample bam_header['read_group'].each { rgMap -> def lib_id = methods.sanitize_uclahs_cds_id(rgMap['LB']) def rg_id = methods.sanitize_uclahs_cds_id(rgMap['ID']) From 8fd71195c99d8734367424229f708e15b11be9ba Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Thu, 27 Jun 2024 15:13:51 -0700 Subject: [PATCH 42/45] change process names --- README.md | 8 ++++---- config/F16.config | 12 ++++++------ config/F2.config | 12 ++++++------ config/F32.config | 12 ++++++------ config/F4.config | 12 ++++++------ config/F72.config | 12 ++++++------ config/F8.config | 12 ++++++------ config/M64.config | 12 ++++++------ main.nf | 24 ++++++++++++------------ 9 files changed, 58 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index cef525b..4dbe016 100644 --- a/README.md +++ b/README.md @@ -132,23 +132,23 @@ base_resource_update { ] } ``` -- To double memory for `run_CollectWgsMetrics_Picard` and triple memory for `run_stats_SAMtools` and `run_bamqc_Qualimap`: +- To double memory for `run_CollectWgsMetrics_Picard` and triple memory for `run_statsSamples_SAMtools` and `run_bamqc_Qualimap`: ```Nextflow base_resource_update { memory = [ ['run_CollectWgsMetrics_Picard', 2], - [['run_stats_SAMtools', 'run_bamqc_Qualimap'], 3] + [['run_statsSamples_SAMtools', 'run_bamqc_Qualimap'], 3] ] } ``` -- To double CPUs and memory for `run_CollectWgsMetrics_Picard` and double memory for `run_stats_SAMtools`: +- To double CPUs and memory for `run_CollectWgsMetrics_Picard` and double memory for `run_statsSamples_SAMtools`: ```Nextflow base_resource_update { cpus = [ ['run_CollectWgsMetrics_Picard', 2] ] memory = [ - [['run_CollectWgsMetrics_Picard', 'run_stats_SAMtools'], 2] + [['run_CollectWgsMetrics_Picard', 'run_statsSamples_SAMtools'], 2] ] } ``` diff --git a/config/F16.config b/config/F16.config index 70d8990..ce2e3db 100644 --- a/config/F16.config +++ b/config/F16.config @@ -3,7 +3,7 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC_readgroup { + withName: assess_ReadQualityReadgroups_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -13,7 +13,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_library { + withName: assess_ReadQualityLibraries_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -23,7 +23,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_sample { + withName: assess_ReadQualitySamples_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -33,7 +33,7 @@ process { } } } - withName: run_stats_SAMtools_readgroup { + withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.GB retry_strategy { @@ -43,7 +43,7 @@ process { } } } - withName: run_stats_SAMtools_library { + withName: run_statsLibraries_SAMtools { cpus = 1 memory = 1.GB retry_strategy { @@ -53,7 +53,7 @@ process { } } } - withName: run_stats_SAMtools_sample { + withName: run_statsSamples_SAMtools { cpus = 1 memory = 1.GB retry_strategy { diff --git a/config/F2.config b/config/F2.config index 30a5751..5bba263 100644 --- a/config/F2.config +++ b/config/F2.config @@ -3,7 +3,7 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC_readgroup { + withName: assess_ReadQualityReadgroups_FastQC { cpus = 1 memory = 1500.MB retry_strategy { @@ -13,7 +13,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_library { + withName: assess_ReadQualityLibraries_FastQC { cpus = 1 memory = 1500.MB retry_strategy { @@ -23,7 +23,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_sample { + withName: assess_ReadQualitySamples_FastQC { cpus = 1 memory = 1500.MB retry_strategy { @@ -33,7 +33,7 @@ process { } } } - withName: run_stats_SAMtools_readgroup { + withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1500.MB retry_strategy { @@ -43,7 +43,7 @@ process { } } } - withName: run_stats_SAMtools_library { + withName: run_statsLibraries_SAMtools { cpus = 1 memory = 1500.MB retry_strategy { @@ -53,7 +53,7 @@ process { } } } - withName: run_stats_SAMtools_sample { + withName: run_statsSamples_SAMtools { cpus = 1 memory = 1500.MB retry_strategy { diff --git a/config/F32.config b/config/F32.config index d8b87cf..34b53b0 100644 --- a/config/F32.config +++ b/config/F32.config @@ -3,7 +3,7 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC_readgroup { + withName: assess_ReadQualityReadgroups_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -13,7 +13,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_library { + withName: assess_ReadQualityLibraries_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -23,7 +23,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_sample { + withName: assess_ReadQualitySamples_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -33,7 +33,7 @@ process { } } } - withName: run_stats_SAMtools_readgroup { + withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.GB retry_strategy { @@ -43,7 +43,7 @@ process { } } } - withName: run_stats_SAMtools_library { + withName: run_statsLibraries_SAMtools { cpus = 1 memory = 1.GB retry_strategy { @@ -53,7 +53,7 @@ process { } } } - withName: run_stats_SAMtools_sample { + withName: run_statsSamples_SAMtools { cpus = 1 memory = 1.GB retry_strategy { diff --git a/config/F4.config b/config/F4.config index 9b9373f..de4956c 100644 --- a/config/F4.config +++ b/config/F4.config @@ -3,7 +3,7 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC_readgroup { + withName: assess_ReadQualityReadgroups_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -13,7 +13,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_library { + withName: assess_ReadQualityLibraries_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -23,7 +23,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_sample { + withName: assess_ReadQualitySamples_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -33,7 +33,7 @@ process { } } } - withName: run_stats_SAMtools_readgroup { + withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.GB retry_strategy { @@ -43,7 +43,7 @@ process { } } } - withName: run_stats_SAMtools_library { + withName: run_statsLibraries_SAMtools { cpus = 1 memory = 1.GB retry_strategy { @@ -53,7 +53,7 @@ process { } } } - withName: run_stats_SAMtools_sample { + withName: run_statsSamples_SAMtools { cpus = 1 memory = 1.GB retry_strategy { diff --git a/config/F72.config b/config/F72.config index 0268f16..76ea5f5 100644 --- a/config/F72.config +++ b/config/F72.config @@ -3,7 +3,7 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC_readgroup { + withName: assess_ReadQualityReadgroups_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -13,7 +13,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_library { + withName: assess_ReadQualityLibraries_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -23,7 +23,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_sample { + withName: assess_ReadQualitySamples_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -33,7 +33,7 @@ process { } } } - withName: run_stats_SAMtools_readgroup { + withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.GB retry_strategy { @@ -43,7 +43,7 @@ process { } } } - withName: run_stats_SAMtools_library { + withName: run_statsLibraries_SAMtools { cpus = 1 memory = 1.GB retry_strategy { @@ -53,7 +53,7 @@ process { } } } - withName: run_stats_SAMtools_sample { + withName: run_statsSamples_SAMtools { cpus = 1 memory = 1.GB retry_strategy { diff --git a/config/F8.config b/config/F8.config index c1db89d..dafa3d9 100644 --- a/config/F8.config +++ b/config/F8.config @@ -3,7 +3,7 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC_readgroup { + withName: assess_ReadQualityReadgroups_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -13,7 +13,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_library { + withName: assess_ReadQualityLibraries_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -23,7 +23,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_sample { + withName: assess_ReadQualitySamples_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -33,7 +33,7 @@ process { } } } - withName: run_stats_SAMtools_readgroup { + withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.GB retry_strategy { @@ -43,7 +43,7 @@ process { } } } - withName: run_stats_SAMtools_library { + withName: run_statsLibraries_SAMtools { cpus = 1 memory = 1.GB retry_strategy { @@ -53,7 +53,7 @@ process { } } } - withName: run_stats_SAMtools_sample { + withName: run_statsSamples_SAMtools { cpus = 1 memory = 1.GB retry_strategy { diff --git a/config/M64.config b/config/M64.config index 84f49bb..d6718d1 100644 --- a/config/M64.config +++ b/config/M64.config @@ -3,7 +3,7 @@ process { cpus = 1 memory = 250.MB } - withName: assess_ReadQuality_FastQC_readgroup { + withName: assess_ReadQualityReadgroups_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -13,7 +13,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_library { + withName: assess_ReadQualityLibraries_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -23,7 +23,7 @@ process { } } } - withName: assess_ReadQuality_FastQC_sample { + withName: assess_ReadQualitySamples_FastQC { cpus = 2 memory = 1.GB retry_strategy { @@ -33,7 +33,7 @@ process { } } } - withName: run_stats_SAMtools_readgroup { + withName: run_statsReadgroups_SAMtools { cpus = 1 memory = 1.GB retry_strategy { @@ -43,7 +43,7 @@ process { } } } - withName: run_stats_SAMtools_library { + withName: run_statsLibraries_SAMtools { cpus = 1 memory = 1.GB retry_strategy { @@ -53,7 +53,7 @@ process { } } } - withName: run_stats_SAMtools_sample { + withName: run_statsSamples_SAMtools { cpus = 1 memory = 1.GB retry_strategy { diff --git a/main.nf b/main.nf index c42b6c9..bad933c 100755 --- a/main.nf +++ b/main.nf @@ -7,37 +7,37 @@ include { run_validate_PipeVal } from './external/pipeline-Nextflow-module/modul main_process: "./" //Save logs in /process-log/run_validate_PipeVal ] ) -include { run_stats_SAMtools as run_stats_SAMtools_readgroup } from './module/stats_samtools' addParams( +include { run_stats_SAMtools as run_statsReadgroups_SAMtools } from './module/stats_samtools' addParams( workflow_output_dir: "${params.output_dir_base}/SAMtools-${params.samtools_version}", workflow_log_output_dir: "${params.log_output_dir}/process-log/SAMtools-${params.samtools_version}", stat_mode: "readgroup" ) -include { run_stats_SAMtools as run_stats_SAMtools_library } from './module/stats_samtools' addParams( +include { run_stats_SAMtools as run_statsLibraries_SAMtools } from './module/stats_samtools' addParams( workflow_output_dir: "${params.output_dir_base}/SAMtools-${params.samtools_version}", workflow_log_output_dir: "${params.log_output_dir}/process-log/SAMtools-${params.samtools_version}", stat_mode: "library" ) -include { run_stats_SAMtools as run_stats_SAMtools_sample } from './module/stats_samtools' addParams( +include { run_stats_SAMtools as run_statsSamples_SAMtools } from './module/stats_samtools' addParams( workflow_output_dir: "${params.output_dir_base}/SAMtools-${params.samtools_version}", workflow_log_output_dir: "${params.log_output_dir}/process-log/SAMtools-${params.samtools_version}", stat_mode: "sample" ) -include { assess_ReadQuality_FastQC as assess_ReadQuality_FastQC_readgroup } from './module/fastqc' addParams( +include { assess_ReadQuality_FastQC as assess_ReadQualityReadgroups_FastQC } from './module/fastqc' addParams( workflow_output_dir: "${params.output_dir_base}/FastQC-${params.fastqc_version}", workflow_log_output_dir: "${params.log_output_dir}/process-log/FastQC-${params.fastqc_version}", stat_mode: "readgroup" ) -include { assess_ReadQuality_FastQC as assess_ReadQuality_FastQC_library } from './module/fastqc' addParams( +include { assess_ReadQuality_FastQC as assess_ReadQualityLibraries_FastQC } from './module/fastqc' addParams( workflow_output_dir: "${params.output_dir_base}/FastQC-${params.fastqc_version}", workflow_log_output_dir: "${params.log_output_dir}/process-log/FastQC-${params.fastqc_version}", stat_mode: "library" ) -include { assess_ReadQuality_FastQC as assess_ReadQuality_FastQC_sample } from './module/fastqc' addParams( +include { assess_ReadQuality_FastQC as assess_ReadQualitySamples_FastQC } from './module/fastqc' addParams( workflow_output_dir: "${params.output_dir_base}/FastQC-${params.fastqc_version}", workflow_log_output_dir: "${params.log_output_dir}/process-log/FastQC-${params.fastqc_version}", stat_mode: "sample" @@ -206,29 +206,29 @@ workflow { ) if ('stats' in params.algorithm) { - run_stats_SAMtools_readgroup( + run_statsReadgroups_SAMtools( stats_readgroups_ch ) - run_stats_SAMtools_library( + run_statsLibraries_SAMtools( stats_libraries_ch ) - run_stats_SAMtools_sample( + run_statsSamples_SAMtools( samples_to_process_ch ) } if ('fastqc' in params.algorithm) { if (params.fastqc_level == 'readgroup') { - assess_ReadQuality_FastQC_readgroup( + assess_ReadQualityReadgroups_FastQC( readgroups_to_process_ch ) } else if (params.fastqc_level == 'library') { - assess_ReadQuality_FastQC_library( + assess_ReadQualityLibraries_FastQC( libraries_to_process_ch ) } else if (params.fastqc_level == 'sample') { - assess_ReadQuality_FastQC_sample( + assess_ReadQualitySamples_FastQC( samples_to_process_ch ) } From 9bb4d2f57fb7bce8aae84feb479aa39148d9f1b1 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Thu, 27 Jun 2024 15:39:52 -0700 Subject: [PATCH 43/45] rename bamqc_outformat to bamqc_output_format --- README.md | 2 +- config/template.config | 2 +- main.nf | 2 +- module/bamqc_qualimap.nf | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 4dbe016..5b1ac3a 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,7 @@ input: #### Qualimap specific configuration | Field | Type | Required | Description | | ----- | ---- | ------------ | ------------------------ | -| bamqc_outformat | string | no | Choice of 'pdf' or 'html', default = 'pdf' | +| bamqc_output_format | string | no | Choice of 'pdf' or 'html', default = 'pdf' | | bamqc_additional_options | string | no | Any additional options recognized by `bamqc` | #### Base resource allocation updaters diff --git a/config/template.config b/config/template.config index ff3bb64..50b54d1 100644 --- a/config/template.config +++ b/config/template.config @@ -27,7 +27,7 @@ params { cwm_additional_options = '' // Qualimap bamqc options - bamqc_outformat = 'pdf' // 'html' or 'pdf' + bamqc_output_format = 'pdf' // 'html' or 'pdf' bamqc_additional_options = '' // FastQC options diff --git a/main.nf b/main.nf index bad933c..af43c65 100755 --- a/main.nf +++ b/main.nf @@ -119,7 +119,7 @@ log.info """\ - Qualimap bamqc options: qualimap_version: ${params.qualimap_version} - bamqc_outformat: ${params.bamqc_outformat} + bamqc_output_format: ${params.bamqc_output_format} bamqc_additional_options: ${params.bamqc_additional_options} """ diff --git a/module/bamqc_qualimap.nf b/module/bamqc_qualimap.nf index 1c447e9..d16fe17 100644 --- a/module/bamqc_qualimap.nf +++ b/module/bamqc_qualimap.nf @@ -34,7 +34,7 @@ process run_bamqc_Qualimap { --java-mem-size=${(task.memory * params.jvm_fraction).getMega()}M \ -bam ${path} \ -outdir ${output_filename}_stats \ - -outformat ${params.bamqc_outformat} \ + -outformat ${params.bamqc_output_format} \ -outfile ${output_filename} \ -nt ${task.cpus} \ -c \ From 2a63bb4f6ad07897e9c8e2049beff940ddfab7f5 Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Thu, 27 Jun 2024 15:41:39 -0700 Subject: [PATCH 44/45] rename bamqc_outformat to bamqc_output_format --- test/config/all-tools.config | 2 +- test/config/bamqc.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/config/all-tools.config b/test/config/all-tools.config index d1bcddc..c77e83a 100644 --- a/test/config/all-tools.config +++ b/test/config/all-tools.config @@ -26,7 +26,7 @@ params { cwm_use_fast_algorithm = false // Qualimap bamqc options - bamqc_outformat = 'pdf' + bamqc_output_format = 'pdf' bamqc_additional_options = '' // FastQC options diff --git a/test/config/bamqc.config b/test/config/bamqc.config index 027097b..2fdef1e 100644 --- a/test/config/bamqc.config +++ b/test/config/bamqc.config @@ -14,7 +14,7 @@ params { save_intermediate_files = true // Qualimap bamqc options - bamqc_outformat = 'pdf' + bamqc_output_format = 'pdf' bamqc_additional_options = '' // Base resource allocation updater From 4e1e2fdc234bdfd25441cf8bb82651bab0347e4c Mon Sep 17 00:00:00 2001 From: Sorel Fitz-Gibbon Date: Thu, 27 Jun 2024 15:47:14 -0700 Subject: [PATCH 45/45] remove fastqc as default --- config/schema.yaml | 1 - config/template.config | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/config/schema.yaml b/config/schema.yaml index f7cf09b..868347f 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -17,7 +17,6 @@ algorithm: - collectwgsmetrics - bamqc default: - - fastqc - stats - collectwgsmetrics reference: diff --git a/config/template.config b/config/template.config index 50b54d1..425e1b0 100644 --- a/config/template.config +++ b/config/template.config @@ -8,7 +8,7 @@ includeConfig "${projectDir}/nextflow.config" // Inputs/parameters of the pipeline params { - algorithm = ['fastqc', 'stats', 'collectwgsmetrics'] // 'fastqc', 'stats', 'collectwgsmetrics', 'bamqc' + algorithm = ['stats', 'collectwgsmetrics'] // 'fastqc', 'stats', 'collectwgsmetrics', 'bamqc' reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' output_dir = '/path/to/output/directory' blcds_registered_dataset = false // if you want the output to be registered