Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Subworkflow - PON creation - Handle intervals + refactor #3675

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 34 additions & 42 deletions subworkflows/nf-core/bam_create_som_pon_gatk/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,68 +2,60 @@
// Run GATK mutect2, genomicsdbimport and createsomaticpanelofnormals
//

include { GATK4_MUTECT2 } from '../../../modules/nf-core/gatk4/mutect2/main'
include { GATK4_GENOMICSDBIMPORT } from '../../../modules/nf-core/gatk4/genomicsdbimport/main'
include { GATK4_CREATESOMATICPANELOFNORMALS } from '../../../modules/nf-core/gatk4/createsomaticpanelofnormals/main'
include { GATK4_GENOMICSDBIMPORT } from '../../../modules/nf-core/gatk4/genomicsdbimport/main'
include { GATK4_MERGEVCFS } from '../../../modules/nf-core/gatk4/mergevcfs/main'
include { GATK4_MUTECT2 } from '../../../modules/nf-core/gatk4/mutect2/main'

workflow BAM_CREATE_SOM_PON_GATK {
take:
ch_mutect2_in // channel: [ val(meta), path(input), path(input_index), path(interval_file) ]
ch_input // channel: [ val(meta), path(input), path(index) ]
ch_fasta // channel: [ val(meta), path(fasta) ]
ch_fai // channel: [ val(meta), path(fai) ]
ch_dict // channel: [ val(meta), path(dict) ]
val_pon_norm // string: name for panel of normals
ch_gendb_intervals // channel: [ path(interval_file) ]
ch_intervals // channel: [mandatory] [ intervals, num_intervals ]

main:
ch_versions = Channel.empty()
ch_input = ch_mutect2_in

//
// Perform variant calling for each sample using mutect2 module in panel of normals mode.
//
GATK4_MUTECT2 (
ch_input,
ch_fasta,
ch_fai,
ch_dict,
[],
[],
[],
[]
)
ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions.first())

//
// Convert all sample vcfs into a genomicsdb workspace using genomicsdbimport.
//
ch_vcf = GATK4_MUTECT2.out.vcf.collect{it[1]}.toList()
ch_index = GATK4_MUTECT2.out.tbi.collect{it[1]}.toList()
ch_dict_gendb = ch_dict.map{meta, dict -> return dict}.toList()
// Combine input and intervals for spread and gather strategy
ch_input_intervals = ch_input.combine(ch_intervals)
// Move num_intervals to meta map and reorganize channel for MUTECT2_PAIRED module
// .map{ meta, input, index, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], input, index, intervals ] }
.map{ meta, input, index, intervals, num_intervals -> [ meta + [ num_intervals:num_intervals ], input, index, [] ] }

ch_gendb_input = Channel.of([id:val_pon_norm])
.combine(ch_vcf)
.combine(ch_index)
.combine(ch_gendb_intervals)
.combine(ch_dict_gendb)
.map{meta, vcf, tbi, interval, dict -> [meta, vcf, tbi, interval, [], dict]}
GATK4_MUTECT2(ch_input_intervals, ch_fasta, ch_fai, ch_dict, [], [], [], [])

GATK4_GENOMICSDBIMPORT ( ch_gendb_input, false, false, false )
ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions.first())
// GATK4_MERGEVCFS(GATK4_MUTECT2.out.vcf.map{ meta, vcf -> [ groupKey(meta, meta.num_intervals), vcf ] }.groupTuple(), ch_dict)

//
//Panel of normals made from genomicsdb workspace using createsomaticpanelofnormals.
//
GATK4_CREATESOMATICPANELOFNORMALS ( GATK4_GENOMICSDBIMPORT.out.genomicsdb, ch_fasta, ch_fai, ch_dict )
// vcf_joint = GATK4_MERGEVCFS.out.vcf.map{ meta, vcf -> [[id: 'joint'], vcf]}.groupTuple()
// tbi_joint = GATK4_MERGEVCFS.out.tbi.map{ meta, tbi -> [[id: 'joint'], tbi]}.groupTuple()

vcf_joint = GATK4_MUTECT2.out.vcf.map{ meta, vcf -> [[id: 'joint'], vcf]}.groupTuple()
tbi_joint = GATK4_MUTECT2.out.tbi.map{ meta, tbi -> [[id: 'joint'], tbi]}.groupTuple()

ch_genomicsdb_input = vcf_joint.join(tbi_joint).combine(ch_intervals).map{ meta, vcf, tbi, intervals, num_intervals -> [meta, vcf, tbi, intervals, [], [] ]}

GATK4_GENOMICSDBIMPORT(ch_genomicsdb_input, [], [], [])
GATK4_CREATESOMATICPANELOFNORMALS(GATK4_GENOMICSDBIMPORT.out.genomicsdb, ch_fasta, ch_fai, ch_dict)

ch_versions = ch_versions.mix(GATK4_MUTECT2.out.versions.first())
// ch_versions = ch_versions.mix(GATK4_MERGEVCFS.out.versions.first())
ch_versions = ch_versions.mix(GATK4_GENOMICSDBIMPORT.out.versions.first())
ch_versions = ch_versions.mix(GATK4_CREATESOMATICPANELOFNORMALS.out.versions.first())

emit:
mutect2_vcf = GATK4_MUTECT2.out.vcf // channel: [ val(meta), path(vcf) ]
mutect2_index = GATK4_MUTECT2.out.tbi // channel: [ val(meta), path(tbi) ]
mutect2_stats = GATK4_MUTECT2.out.stats // channel: [ val(meta), path(stats) ]
genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), path(genomicsdb) ]
pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), path(vcf) ]
pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), path(tbi) ]

versions = ch_versions // channel: [ path(versions.yml) ]
vcf = GATK4_MUTECT2.out.vcf // channel: [ val(meta), path(vcf) ]
index = GATK4_MUTECT2.out.tbi // channel: [ val(meta), path(tbi) ]
stats = GATK4_MUTECT2.out.stats // channel: [ val(meta), path(stats) ]
genomicsdb = GATK4_GENOMICSDBIMPORT.out.genomicsdb // channel: [ val(meta), path(genomicsdb) ]
pon_vcf = GATK4_CREATESOMATICPANELOFNORMALS.out.vcf // channel: [ val(meta), path(vcf) ]
pon_index = GATK4_CREATESOMATICPANELOFNORMALS.out.tbi // channel: [ val(meta), path(tbi) ]

versions = ch_versions // channel: [ path(versions.yml) ]
}
17 changes: 9 additions & 8 deletions subworkflows/nf-core/bam_create_som_pon_gatk/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,18 @@ keywords:
- genomicsdb_workspace
- panel_of_normals
components:
- gatk4/mergevcfs
- gatk4/mutect2
- gatk4/genomicsdbimport
- gatk4/createsomaticpanelofnormals
input:
- ch_mutect2_in:
- ch_input:
type: list
description: |
An input channel containing the following files:
- input: One or more BAM/CRAM files
- input_index: The index/indices from the BAM/CRAM file(s)
- interval_file: An interval file to be used with the mutect call
Structure: [ meta, input, input_index, interval_file ]
- index: The index/indices from the BAM/CRAM file(s)
Structure: [ meta, input, index ]
- fasta:
type: file
description: The reference fasta file
Expand All @@ -39,15 +39,15 @@ output:
type: file
description: File containing software versions
pattern: "versions.yml"
- mutect2_vcf:
- vcf:
type: list
description: List of compressed vcf files to be used to make the gendb workspace
pattern: "[ *.vcf.gz ]"
- mutect2_index:
- index:
type: list
description: List of indexes of mutect2_vcf files
description: List of indexes of vcf files
pattern: "[ *vcf.gz.tbi ]"
- mutect2_stats:
- stats:
type: list
description: List of stats files that pair with mutect2_vcf files
pattern: "[ *vcf.gz.stats ]"
Expand All @@ -67,3 +67,4 @@ authors:
- "@GCJMackenzie"
maintainers:
- "@GCJMackenzie"
- "@maxulysse"
24 changes: 13 additions & 11 deletions subworkflows/nf-core/bam_create_som_pon_gatk/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ nextflow_workflow {
tag "subworkflows_nfcore"
tag "subworkflows/bam_create_som_pon_gatk"
tag "gatk4"
tag "gatk4/mergevcfs"
tag "gatk4/mutect2"
tag "gatk4/genomicsdbimport"
tag "gatk4/createsomaticpanelofnormals"
Expand All @@ -18,17 +19,17 @@ nextflow_workflow {
when {
workflow {
"""
// ch_mutect2_in
// ch_input
input[0] = Channel.of([
[ id:'test1' ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true),
[] ],
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true)
],
[
[ id:'test2' ],
file(params.modules_testdata_base_path+ 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true),
file(params.modules_testdata_base_path+ 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true),
[] ]
file(params.modules_testdata_base_path+ 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true)
]
)
// ch_fasta
input[1] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true)])
Expand All @@ -38,20 +39,21 @@ nextflow_workflow {
input[3] = Channel.value([ [ id:'genome' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.dict', checkIfExists:true)])
// str_pon_norm
input[4] = "test_panel"
// ch_interval_file
input[5] = Channel.value(file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true))
// ch_intervals
input[5] = Channel.value([file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/multi_intervals.bed', checkIfExists: true), 2])
"""
}
}

then {
assertAll(
{ assert workflow.success },
{ assert snapshot(file(workflow.out.mutect2_vcf.get(0).get(1)).name).match("test1.vcf.gz") },
{ assert snapshot(file(workflow.out.mutect2_index.get(0).get(1)).name).match("test1.vcf.gz.tbi") },
{ assert snapshot(file(workflow.out.mutect2_stats.get(0).get(1)).name).match("test1.vcf.gz.stats") },
{ assert snapshot(file(workflow.out.pon_vcf.get(0).get(1)).name).match("test_panel.vcf.gz") },
{ assert snapshot(workflow.out.versions).match("versions") },
{ assert snapshot(file(workflow.out.index.get(0).get(1)).name).match("test1.vcf.gz.tbi") },
{ assert snapshot(file(workflow.out.pon_index.get(0).get(1)).name).match("test_panel.vcf.gz.tbi") },
{ assert snapshot(file(workflow.out.pon_vcf.get(0).get(1)).name).match("test_panel.vcf.gz") },
{ assert snapshot(file(workflow.out.stats.get(0).get(1)).name).match("test1.vcf.gz.stats") },
{ assert snapshot(file(workflow.out.vcf.get(0).get(1)).name).match("test1.vcf.gz") }
)
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,52 +1,66 @@
{
"test_panel.vcf.gz": {
"content": [
"test_panel.vcf.gz"
"joint.vcf.gz"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.0"
},
"timestamp": "2024-02-14T06:59:54.103667303"
"timestamp": "2024-05-15T17:35:09.935561"
},
"versions": {
"content": [
[
"versions.yml:md5,016c722065897238882cf7c871978afd",
"versions.yml:md5,68d32fbcb37e212aa22457de481d76b9",
"versions.yml:md5,d38faba736b5c64157c0c532060730c2"
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.0"
},
"timestamp": "2024-05-15T17:35:09.908215"
},
"test1.vcf.gz.stats": {
"content": [
"test1.vcf.gz.stats"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.0"
},
"timestamp": "2024-02-14T06:59:54.102164313"
"timestamp": "2024-05-15T17:35:09.942514"
},
"test_panel.vcf.gz.tbi": {
"content": [
"test_panel.vcf.gz.tbi"
"joint.vcf.gz.tbi"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.0"
},
"timestamp": "2024-02-14T06:59:54.105382853"
"timestamp": "2024-05-15T17:35:09.931335"
},
"test1.vcf.gz": {
"content": [
"test1.vcf.gz"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.0"
},
"timestamp": "2024-02-14T06:59:54.098085724"
"timestamp": "2024-05-15T17:35:09.948448"
},
"test1.vcf.gz.tbi": {
"content": [
"test1.vcf.gz.tbi"
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
"nextflow": "24.04.0"
},
"timestamp": "2024-02-14T06:59:54.100765684"
"timestamp": "2024-05-15T17:35:09.925845"
}
}
Loading