diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf b/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf index a4b2c2c40c7..17815a03ef1 100644 --- a/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/main.nf @@ -2,68 +2,60 @@ // Run GATK mutect2, genomicsdbimport and createsomaticpanelofnormals // -include { GATK4_MUTECT2 } from '../../../modules/nf-core/gatk4/mutect2/main' -include { GATK4_GENOMICSDBIMPORT } from '../../../modules/nf-core/gatk4/genomicsdbimport/main' include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createsomaticpanelofnormals/main' +include { GATK4_GENOMICSDBIMPORT } from '../../../modules/nf-core/gatk4/genomicsdbimport/main' +include { GATK4_MERGEVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main' +include { GATK4_MUTECT2 } from '../../../modules/nf-core/gatk4/mutect2/main' workflow BAM_CREATE_SOM_PON_GATK { take: - ch_mutect2_in // channel: [ val(meta), path(input), path(input_index), path(interval_file) ] + ch_input // channel: [ val(meta), path(input), path(index) ] ch_fasta // channel: [ val(meta), path(fasta) ] ch_fai // channel: [ val(meta), path(fai) ] ch_dict // channel: [ val(meta), path(dict) ] val_pon_norm // string: name for panel of normals - ch_gendb_intervals // channel: [ path(interval_file) ] + ch_intervals // channel: [mandatory] [ intervals, num_intervals ] main: ch_versions = Channel.empty() - ch_input = ch_mutect2_in // // Perform variant calling for each sample using mutect2 module in panel of normals mode. // - GATK4_MUTECT2 ( - ch_input, - ch_fasta, - ch_fai, - ch_dict, - [], - [], - [], - [] - ) - ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions.first()) - // - // Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport. - // - ch_vcf = GATK4_MUTECT2.out.vcf.collect{it[1]}.toList() - ch_index = GATK4_MUTECT2.out.tbi.collect{it[1]}.toList() - ch_dict_gendb = ch_dict.map{meta, dict -> return dict}.toList() + // Combine input and intervals for spread and gather strategy + ch_input_intervals = ch_input.combine(ch_intervals) + // Move num_intervals to meta map and reorganize channel for MUTECT2_PAIRED module + // .map{ meta, input, index, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], input, index, intervals ] } + .map{ meta, input, index, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], input, index, [] ] } - ch_gendb_input = Channel.of([id:val_pon_norm]) - .combine(ch_vcf) - .combine(ch_index) - .combine(ch_gendb_intervals) - .combine(ch_dict_gendb) - .map{meta, vcf, tbi, interval, dict -> [meta, vcf, tbi, interval, [], dict]} + GATK4_MUTECT2(ch_input_intervals, ch_fasta, ch_fai, ch_dict, [], [], [], []) - GATK4_GENOMICSDBIMPORT ( ch_gendb_input, false, false, false ) - ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions.first()) + // GATK4_MERGEVCFS(GATK4_MUTECT2.out.vcf.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ] }.groupTuple(), ch_dict) - // - //Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals. - // - GATK4_CREATESOMATICPANELOFNORMALS ( GATK4_GENOMICSDBIMPORT.out.genomicsdb, ch_fasta, ch_fai, ch_dict ) + // vcf_joint = GATK4_MERGEVCFS.out.vcf.map{ meta, vcf -> [[id: 'joint'], vcf]}.groupTuple() + // tbi_joint = GATK4_MERGEVCFS.out.tbi.map{ meta, tbi -> [[id: 'joint'], tbi]}.groupTuple() + + vcf_joint = GATK4_MUTECT2.out.vcf.map{ meta, vcf -> [[id: 'joint'], vcf]}.groupTuple() + tbi_joint = GATK4_MUTECT2.out.tbi.map{ meta, tbi -> [[id: 'joint'], tbi]}.groupTuple() + + ch_genomicsdb_input = vcf_joint.join(tbi_joint).combine(ch_intervals).map{ meta, vcf, tbi, intervals, num_intervals -> [meta, vcf, tbi, intervals, [], [] ]} + + GATK4_GENOMICSDBIMPORT(ch_genomicsdb_input, [], [], []) + GATK4_CREATESOMATICPANELOFNORMALS(GATK4_GENOMICSDBIMPORT.out.genomicsdb, ch_fasta, ch_fai, ch_dict) + + ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions.first()) + // ch_versions = ch_versions.mix(GATK4_MERGEVCFS.out.versions.first()) + ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions.first()) ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions.first()) emit: - mutect2_vcf = GATK4_MUTECT2.out.vcf // channel: [ val(meta), path(vcf) ] - mutect2_index = GATK4_MUTECT2.out.tbi // channel: [ val(meta), path(tbi) ] - mutect2_stats = GATK4_MUTECT2.out.stats // channel: [ val(meta), path(stats) ] - genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), path(genomicsdb) ] - pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), path(vcf) ] - pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), path(tbi) ] - - versions = ch_versions // channel: [ path(versions.yml) ] + vcf = GATK4_MUTECT2.out.vcf // channel: [ val(meta), path(vcf) ] + index = GATK4_MUTECT2.out.tbi // channel: [ val(meta), path(tbi) ] + stats = GATK4_MUTECT2.out.stats // channel: [ val(meta), path(stats) ] + genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), path(genomicsdb) ] + pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), path(vcf) ] + pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), path(tbi) ] + + versions = ch_versions // channel: [ path(versions.yml) ] } diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml index 26608360ca0..39692b55c12 100644 --- a/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml @@ -10,18 +10,18 @@ keywords: - genomicsdb_workspace - panel_of_normals components: + - gatk4/mergevcfs - gatk4/mutect2 - gatk4/genomicsdbimport - gatk4/createsomaticpanelofnormals input: - - ch_mutect2_in: + - ch_input: type: list description: | An input channel containing the following files: - input: One or more BAM/CRAM files - - input_index: The index/indices from the BAM/CRAM file(s) - - interval_file: An interval file to be used with the mutect call - Structure: [ meta, input, input_index, interval_file ] + - index: The index/indices from the BAM/CRAM file(s) + Structure: [ meta, input, index ] - fasta: type: file description: The reference fasta file @@ -39,15 +39,15 @@ output: type: file description: File containing software versions pattern: "versions.yml" - - mutect2_vcf: + - vcf: type: list description: List of compressed vcf files to be used to make the gendb workspace pattern: "[ *.vcf.gz ]" - - mutect2_index: + - index: type: list - description: List of indexes of mutect2_vcf files + description: List of indexes of vcf files pattern: "[ *vcf.gz.tbi ]" - - mutect2_stats: + - stats: type: list description: List of stats files that pair with mutect2_vcf files pattern: "[ *vcf.gz.stats ]" @@ -67,3 +67,4 @@ authors: - "@GCJMackenzie" maintainers: - "@GCJMackenzie" + - "@maxulysse" diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test index 57aee898c3e..0588b8cbff8 100644 --- a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test @@ -10,6 +10,7 @@ nextflow_workflow { tag "subworkflows_nfcore" tag "subworkflows/bam_create_som_pon_gatk" tag "gatk4" + tag "gatk4/mergevcfs" tag "gatk4/mutect2" tag "gatk4/genomicsdbimport" tag "gatk4/createsomaticpanelofnormals" @@ -18,17 +19,17 @@ nextflow_workflow { when { workflow { """ - // ch_mutect2_in + // ch_input input[0] = Channel.of([ [ id:'test1' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), - [] ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) + ], [ [ id:'test2' ], file(params.modules_testdata_base_path+ 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), - file(params.modules_testdata_base_path+ 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), - [] ] + file(params.modules_testdata_base_path+ 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) + ] ) // ch_fasta input[1] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)]) @@ -38,8 +39,8 @@ nextflow_workflow { input[3] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists:true)]) // str_pon_norm input[4] = "test_panel" - // ch_interval_file - input[5] = Channel.value(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true)) + // ch_intervals + input[5] = Channel.value([file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true), 2]) """ } } @@ -47,11 +48,12 @@ nextflow_workflow { then { assertAll( { assert workflow.success }, - { assert snapshot(file(workflow.out.mutect2_vcf.get(0).get(1)).name).match("test1.vcf.gz") }, - { assert snapshot(file(workflow.out.mutect2_index.get(0).get(1)).name).match("test1.vcf.gz.tbi") }, - { assert snapshot(file(workflow.out.mutect2_stats.get(0).get(1)).name).match("test1.vcf.gz.stats") }, - { assert snapshot(file(workflow.out.pon_vcf.get(0).get(1)).name).match("test_panel.vcf.gz") }, + { assert snapshot(workflow.out.versions).match("versions") }, + { assert snapshot(file(workflow.out.index.get(0).get(1)).name).match("test1.vcf.gz.tbi") }, { assert snapshot(file(workflow.out.pon_index.get(0).get(1)).name).match("test_panel.vcf.gz.tbi") }, + { assert snapshot(file(workflow.out.pon_vcf.get(0).get(1)).name).match("test_panel.vcf.gz") }, + { assert snapshot(file(workflow.out.stats.get(0).get(1)).name).match("test1.vcf.gz.stats") }, + { assert snapshot(file(workflow.out.vcf.get(0).get(1)).name).match("test1.vcf.gz") } ) } } diff --git a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap index 4c0d88a26da..b2edb01f24c 100644 --- a/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap +++ b/subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test.snap @@ -1,13 +1,27 @@ { "test_panel.vcf.gz": { "content": [ - "test_panel.vcf.gz" + "joint.vcf.gz" ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.0" }, - "timestamp": "2024-02-14T06:59:54.103667303" + "timestamp": "2024-05-15T17:35:09.935561" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,016c722065897238882cf7c871978afd", + "versions.yml:md5,68d32fbcb37e212aa22457de481d76b9", + "versions.yml:md5,d38faba736b5c64157c0c532060730c2" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.0" + }, + "timestamp": "2024-05-15T17:35:09.908215" }, "test1.vcf.gz.stats": { "content": [ @@ -15,19 +29,19 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.0" }, - "timestamp": "2024-02-14T06:59:54.102164313" + "timestamp": "2024-05-15T17:35:09.942514" }, "test_panel.vcf.gz.tbi": { "content": [ - "test_panel.vcf.gz.tbi" + "joint.vcf.gz.tbi" ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.0" }, - "timestamp": "2024-02-14T06:59:54.105382853" + "timestamp": "2024-05-15T17:35:09.931335" }, "test1.vcf.gz": { "content": [ @@ -35,9 +49,9 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.0" }, - "timestamp": "2024-02-14T06:59:54.098085724" + "timestamp": "2024-05-15T17:35:09.948448" }, "test1.vcf.gz.tbi": { "content": [ @@ -45,8 +59,8 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nextflow": "24.04.0" }, - "timestamp": "2024-02-14T06:59:54.100765684" + "timestamp": "2024-05-15T17:35:09.925845" } } \ No newline at end of file