From 116b902a6dcb9e6bfdeea8d3c09cba4b95586cef Mon Sep 17 00:00:00 2001 From: Isla_DS <74415379+shesanIsland@users.noreply.github.com> Date: Mon, 25 Mar 2024 16:06:24 +0000 Subject: [PATCH] merge lofreq and ivar results add lofreq module and subworkflow 'LOFREQ_IVAR' --- conf/modules_illumina.config | 65 +++++ modules.json | 245 ++++++++++++++---- .../nf-core/bcftools/merge/environment.yml | 7 + modules/nf-core/bcftools/merge/main.nf | 64 +++++ modules/nf-core/bcftools/merge/meta.yml | 88 +++++++ .../lofreq/callparallel/environment.yml | 7 + modules/nf-core/lofreq/callparallel/main.nf | 41 +++ modules/nf-core/lofreq/callparallel/meta.yml | 60 +++++ subworkflows/local/lofreq_ivar.nf | 102 ++++++++ workflows/illumina.nf | 61 ++++- 10 files changed, 683 insertions(+), 57 deletions(-) create mode 100644 modules/nf-core/bcftools/merge/environment.yml create mode 100644 modules/nf-core/bcftools/merge/main.nf create mode 100644 modules/nf-core/bcftools/merge/meta.yml create mode 100644 modules/nf-core/lofreq/callparallel/environment.yml create mode 100644 modules/nf-core/lofreq/callparallel/main.nf create mode 100644 modules/nf-core/lofreq/callparallel/meta.yml create mode 100644 subworkflows/local/lofreq_ivar.nf diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 05424060..012326af 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -351,6 +351,71 @@ if (!params.skip_variants) { } } + + if (variant_caller == 'lofreq') { + process { + withName: 'LOFREQ_CALLPARALLEL' { + //ext.args = '--call-indels --dindel --viterbi --uniq --vcfplot --cluster' + publishDir = [ + path: { "${params.outdir}/variants/lofreq/calls" }, + mode: params.publish_dir_mode, + pattern: '*.vcf.gz', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'VARIANTS_IVAR:IVAR_VARIANTS' { + ext.args = '-t 0.25 -q 1 -m 10' + ext.args2 = '--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0' + publishDir = [ + path: { "${params.outdir}/variants/ivar" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'IVAR_VARIANTS_TO_VCF' { + ext.args = params.protocol == 'amplicon' ? '--ignore_strand_bias' : '' + publishDir = [ + path: { "${params.outdir}/variants/ivar/log" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ] + } + withName: '.*:.*:VARIANTS_IVAR:BCFTOOLS_SORT' { + publishDir = [ + path: { "${params.outdir}/variants/ivar" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*:.*:VARIANTS_IVAR:.*:TABIX_TABIX' { + ext.args = '-p vcf -f' + publishDir = [ + path: { "${params.outdir}/variants/ivar" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*:.*:VARIANTS_IVAR:.*:BCFTOOLS_STATS' { + publishDir = [ + path: { "${params.outdir}/variants/ivar/bcftools_stats" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + // CREATE NF PROCESS TO MERGE VCF FILE + withName: 'BCFTOOLS_MERGE' { + publishDir = [ + path: { "${params.outdir}/variants/merged" }, + mode: params.publish_dir_mode, + pattern: '*.{bcf,vcf}{,.gz}', + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + + if (variant_caller == 'ivar') { process { withName: 'IVAR_VARIANTS' { diff --git a/modules.json b/modules.json index 4e7dec6f..a76ad67e 100644 --- a/modules.json +++ b/modules.json @@ -8,257 +8,381 @@ "abacas": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "artic/guppyplex": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "artic/minion": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bandage/image": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/consensus": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/filter": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "bcftools/merge": { + "branch": "master", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", + "installed_by": [ + "modules" + ] }, "bcftools/mpileup": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/norm": { "branch": "master", "git_sha": "bcad95fb35e567ad25840d3297c3e17eff211a3a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/query": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/sort": { "branch": "master", "git_sha": "4a21e4cca35e72ec059abd67f790e0b192ce5d81", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/stats": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/getfasta": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/maskfasta": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bedtools/merge": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "blast/blastn": { "branch": "master", "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "blast/makeblastdb": { "branch": "master", "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bowtie2/align": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules", "fastq_align_bowtie2"] + "installed_by": [ + "modules", + "fastq_align_bowtie2" + ] }, "bowtie2/build": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "cat/fastq": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "custom/getchromsizes": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastp": { "branch": "master", "git_sha": "20a508676f40d0fd3f911ac595af91ec845704c4", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "fastqc": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "gunzip": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ivar/consensus": { "branch": "master", "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ivar/trim": { "branch": "master", "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ivar/variants": { "branch": "master", "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "kraken2/kraken2": { "branch": "master", "git_sha": "7c695e0147df1157413e06246d9b0094617d3e6b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] + }, + "lofreq/callparallel": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": [ + "modules" + ] }, "minia": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mosdepth": { "branch": "master", "git_sha": "def5f182583df0c20f43ec3d4355e8ebd341aaa9", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "nanoplot": { "branch": "master", "git_sha": "3822e04e49b6d89b7092feb3480d744cb5d9986b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "nextclade/datasetget": { "branch": "master", "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "nextclade/run": { "branch": "master", "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pangolin": { "branch": "master", "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/collectmultiplemetrics": { "branch": "master", "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "picard/markduplicates": { "branch": "master", "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", - "installed_by": ["modules", "bam_markduplicates_picard"] + "installed_by": [ + "modules", + "bam_markduplicates_picard" + ] }, "plasmidid": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pycoqc": { "branch": "master", "git_sha": "cb8a5428685f490d0295563b1b0c3a239bbe1927", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "quast": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules", "bam_stats_samtools"] + "installed_by": [ + "modules", + "bam_stats_samtools" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules", "bam_stats_samtools"] + "installed_by": [ + "modules", + "bam_stats_samtools" + ] }, "samtools/index": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["bam_markduplicates_picard", "modules", "bam_sort_stats_samtools"] + "installed_by": [ + "bam_markduplicates_picard", + "modules", + "bam_sort_stats_samtools" + ] }, "samtools/sort": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules", "bam_sort_stats_samtools"] + "installed_by": [ + "modules", + "bam_sort_stats_samtools" + ] }, "samtools/stats": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules", "bam_stats_samtools"] + "installed_by": [ + "modules", + "bam_stats_samtools" + ] }, "samtools/view": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "spades": { "branch": "master", "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/bgzip": { "branch": "master", "git_sha": "90294980a903ecebd99ac31d8b6c66af48fa8259", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "tabix/tabix": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "unicycler": { "branch": "master", "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "untar": { "branch": "master", "git_sha": "cc1f997fab6d8fde5dc0e6e2a310814df5b53ce7", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "vcflib/vcfuniq": { "branch": "master", "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -267,25 +391,34 @@ "bam_markduplicates_picard": { "branch": "master", "git_sha": "6f1697c121719dedde9e0537b6ed6a9cb8c13583", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "bam_sort_stats_samtools": { "branch": "master", "git_sha": "3911652a6b24249358f79e8b8466338d63efb2a2", - "installed_by": ["fastq_align_bowtie2"] + "installed_by": [ + "fastq_align_bowtie2" + ] }, "bam_stats_samtools": { "branch": "master", "git_sha": "b4b7f89e7fd6d2293f0c176213f710e0bcdaf19e", - "installed_by": ["bam_sort_stats_samtools", "bam_markduplicates_picard"] + "installed_by": [ + "bam_sort_stats_samtools", + "bam_markduplicates_picard" + ] }, "fastq_align_bowtie2": { "branch": "master", "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/merge/environment.yml b/modules/nf-core/bcftools/merge/environment.yml new file mode 100644 index 00000000..55de7cd3 --- /dev/null +++ b/modules/nf-core/bcftools/merge/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/merge/main.nf b/modules/nf-core/bcftools/merge/main.nf new file mode 100644 index 00000000..b85d3fec --- /dev/null +++ b/modules/nf-core/bcftools/merge/main.nf @@ -0,0 +1,64 @@ +process BCFTOOLS_MERGE { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcfs), path(tbis) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + path(bed) + + output: + tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: merged_variants + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def regions = bed ? "--regions-file $bed" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + """ + bcftools merge \\ + $args \\ + $regions \\ + --threads $task.cpus \\ + --output ${prefix}.${extension} \\ + $vcfs + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/merge/meta.yml b/modules/nf-core/bcftools/merge/meta.yml new file mode 100644 index 00000000..87707140 --- /dev/null +++ b/modules/nf-core/bcftools/merge/meta.yml @@ -0,0 +1,88 @@ +name: bcftools_merge +description: Merge VCF files +keywords: + - variant calling + - merge + - VCF +tools: + - merge: + description: | + Merge VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: file + description: | + List containing 2 or more vcf files + e.g. [ 'file1.vcf', 'file2.vcf' ] + - tbis: + type: file + description: | + List containing the tbi index files corresponding to the vcfs input files + e.g. [ 'file1.vcf.tbi', 'file2.vcf.tbi' ] + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: "(Optional) The fasta reference file (only necessary for the `--gvcf FILE` parameter)" + pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: "(Optional) The fasta reference file index (only necessary for the `--gvcf FILE` parameter)" + pattern: "*.fai" + - bed: + type: file + description: "(Optional) The bed regions to merge on" + pattern: "*.bed" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf_gz: + type: file + description: VCF merged output file (bgzipped) => when `--output-type z` is used + pattern: "*.vcf.gz" + - vcf: + type: file + description: VCF merged output file => when `--output-type v` is used + pattern: "*.vcf" + - bcf_gz: + type: file + description: BCF merged output file (bgzipped) => when `--output-type b` is used + pattern: "*.bcf.gz" + - bcf: + type: file + description: BCF merged output file => when `--output-type u` is used + pattern: "*.bcf" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@nvnieuwk" + - "@ramprasadn" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@nvnieuwk" + - "@ramprasadn" diff --git a/modules/nf-core/lofreq/callparallel/environment.yml b/modules/nf-core/lofreq/callparallel/environment.yml new file mode 100644 index 00000000..222d1450 --- /dev/null +++ b/modules/nf-core/lofreq/callparallel/environment.yml @@ -0,0 +1,7 @@ +name: lofreq_callparallel +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::lofreq=2.1.5 diff --git a/modules/nf-core/lofreq/callparallel/main.nf b/modules/nf-core/lofreq/callparallel/main.nf new file mode 100644 index 00000000..8d4f8af6 --- /dev/null +++ b/modules/nf-core/lofreq/callparallel/main.nf @@ -0,0 +1,41 @@ +process LOFREQ_CALLPARALLEL { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/lofreq:2.1.5--py38h588ecb2_4' : + 'biocontainers/lofreq:2.1.5--py38h588ecb2_4' }" + + input: + tuple val(meta), path(bam), path(bai), path(intervals) + path fasta + path fai + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def options_intervals = intervals ? "-l ${intervals}" : "" + """ + lofreq \\ + call-parallel \\ + --pp-threads $task.cpus \\ + $args \\ + $options_intervals \\ + -f $fasta \\ + -o ${prefix}.vcf.gz \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + lofreq: \$(echo \$(lofreq version 2>&1) | sed 's/^version: //; s/ *commit.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/lofreq/callparallel/meta.yml b/modules/nf-core/lofreq/callparallel/meta.yml new file mode 100644 index 00000000..bfe862ac --- /dev/null +++ b/modules/nf-core/lofreq/callparallel/meta.yml @@ -0,0 +1,60 @@ +name: lofreq_callparallel +description: It predicts variants using multiple processors +keywords: + - variant calling + - low frequency variant calling + - call + - variants +tools: + - lofreq: + description: Lofreq is a fast and sensitive variant-caller for inferring SNVs and indels from next-generation sequencing data. It's call-parallel programme predicts variants using multiple processors + homepage: https://csb5.github.io/lofreq/ + documentation: https://csb5.github.io/lofreq/ + doi: "10.1093/nar/gks918" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - bai: + type: file + description: BAM index file + pattern: "*.{bai}" + - intervals: + type: file + description: BED file containing target regions for variant calling + pattern: "*.{bed}" + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fasta}" + - fai: + type: file + description: Reference genome FASTA index file + pattern: "*.{fai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Predicted variants file + pattern: "*.{vcf}" +authors: + - "@kaurravneet4123" + - "@bjohnnyd" +maintainers: + - "@kaurravneet4123" + - "@bjohnnyd" diff --git a/subworkflows/local/lofreq_ivar.nf b/subworkflows/local/lofreq_ivar.nf new file mode 100644 index 00000000..ad8cb538 --- /dev/null +++ b/subworkflows/local/lofreq_ivar.nf @@ -0,0 +1,102 @@ + + + + +// +// SUBWORKFLOW: Call variants with Lofreq, followed by ivar +// + +//include { LOFREQ_INDELQUAL } from '../modules/nf-core/lofreq_indelqual' +include { LOFREQ_CALLPARALLEL } from '../../modules/nf-core/lofreq/callparallel/main' +include { BCFTOOLS_MERGE } from '../../modules/nf-core/bcftools/merge/main' +include { VARIANTS_IVAR } from '../subworkflows/local/variants_ivar' + + +workflow LOFREQ_IVAR { + take: + lofreq_bam // channel: [ val(meta), [ bam ], [ bai ] ] + bam // channel: [ val(meta), [ bam ] ] // for ivar + fasta // channel: /path/to/genome.fasta + fai // channel: /path/to/genome.fai + sizes // channel: /path/to/genome.sizes + gff // channel: /path/to/genome.gff + bed // channel: /path/to/primers.bed + snpeff_db // channel: /path/to/snpeff_db/ + snpeff_config // channel: /path/to/snpeff.config + ivar_multiqc_header // channel: /path/to/multiqc_header for ivar variants + + main: + + ch_lofreq_vcf = Channel.empty() + ch_vcf = Channel.empty() // ivar + //ch_versions = Channel.empty() + //ch_lofreq_tbi = Channel.empty() + + // + // SUBWORKFLOW: Call variants with Lofreq, followed by ivar + // + // Call Lofreq variants + LOFREQ_CALLPARALLEL ( + lofreq_bam, + fasta, + fai + ) + //ch_lofreq_vcf = LOFREQ_CALLPARALLEL.out.vcf + ch_versions = ch_versions.mix(LOFREQ_CALLPARALLEL.out.versions) + + // + // Call ivar variants by invoking ivar subworkflow + VARIANTS_IVAR ( + ivar_bam, + fasta, + fai, + sizes, + gff, + bed, + snpeff_db, + snpeff_config, + ivar_multiqc_header + ) + ch_versions = ch_versions.mix(VARIANTS_IVAR.out.versions) + + // Merge vcf files w BCFTOOLS_MERGE module + BCFTOOLS_MERGE( + vcfs, + fasta, + fai + ) + ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions) + + + + + emit: + // from VARIANTS_IVAR workflow + tsv = ch_ivar_tsv // channel: [ val(meta), [ tsv ] ] + + vcf_orig = IVAR_VARIANTS_TO_VCF.out.vcf // channel: [ val(meta), [ vcf ] ] + log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] + multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] + + vcf = BCFTOOLS_SORT.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + csi = VCF_TABIX_STATS.out.csi // channel: [ val(meta), [ csi ] ] + stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + + snpeff_vcf = VARIANTS_QC.out.snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = VARIANTS_QC.out.snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = VARIANTS_QC.out.snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = VARIANTS_QC.out.snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = VARIANTS_QC.out.snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = VARIANTS_QC.out.snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = VARIANTS_QC.out.snpsift_txt // channel: [ val(meta), [ txt ] ] + + asciigenome_pdf = VARIANTS_QC.out.asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + + // added from proposed subworkflow + lofreq_vcf = LOFREQ_CALLPARALLEL.out.vcf // channel: [ val(meta), [ vcf ] ] + //lofreq_tbi = LOFREQ_CALLPARALLEL.out.tbi // channel: [ val(meta), [ tbi ] ] + merged_vcf = BCFTOOLS_MERGE.out.merged_variants // channel: [ val(meta), [ vcf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} \ No newline at end of file diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 7d44924f..12f980a9 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -6,7 +6,7 @@ def valid_params = [ protocols : ['metagenomic', 'amplicon'], - variant_callers : ['ivar', 'bcftools'], + variant_callers : ['ivar', 'bcftools'. 'lofreq'], consensus_callers : ['ivar', 'bcftools'], assemblers : ['spades', 'unicycler', 'minia'], spades_modes : ['rnaviral', 'corona', 'metaviral', 'meta', 'metaplasmid', 'plasmid', 'isolate', 'rna', 'bio'] @@ -65,6 +65,7 @@ include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_AMPLICON } from '../mod // include { INPUT_CHECK } from '../subworkflows/local/input_check' include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome_illumina' +include { LOFREQ_IVAR } from '../subworkflows/local/lofreq_ivar' include { VARIANTS_IVAR } from '../subworkflows/local/variants_ivar' include { VARIANTS_BCFTOOLS } from '../subworkflows/local/variants_bcftools' include { CONSENSUS_IVAR } from '../subworkflows/local/consensus_ivar' @@ -89,6 +90,8 @@ include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/mai include { FASTQC } from '../modules/nf-core/fastqc/main' include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/kraken2/kraken2/main' include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/picard/collectmultiplemetrics/main' +include { LOFREQ_CALLPARALLEL } from '../modules/nf-core/lofreq/callparallel/main' +include { BCFTOOLS_MERGE } from '../modules/nf-core/bcftools/merge/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' include { MOSDEPTH as MOSDEPTH_GENOME } from '../modules/nf-core/mosdepth/main' include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/mosdepth/main' @@ -99,6 +102,7 @@ include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/mosdepth/main include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main' include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard/main' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -406,6 +410,61 @@ workflow ILLUMINA { } } + + + // SUBWORKFLOW: Call variants with Lofreq + ch_lofreq_vcf = Channel.empty() + //ch_vcf = Channel.empty() + //ch_lofreq_tbi = Channel.empty() + ch_bam + .join(ch_bai, by: [0]) + .map{ meta,bam,bai -> [meta, bam, bai, []] } + .set{ ch_lofreq_bam_bai } + if (!params.skip_variants && variant_caller == 'lofreq') { + LOFREQ_CALLPARALLEL ( + ch_lofreq_bam_bai, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai + ) + ch_lofreq_vcf = LOFREQ_CALLPARALLEL.out.vcf + //ch_lofreq_tbi = LOFREQ_CALLPARALLEL.out.tbi + ch_versions = ch_versions.mix(LOFREQ_CALLPARALLEL.out.versions) + + // SUBWORKFLOW: Call variants with ivar + VARIANTS_IVAR ( + ch_bam, + PREPARE_GENOME.out.fasta, + (params.protocol == 'amplicon' || !params.skip_asciigenome || !params.skip_markduplicates) ? PREPARE_GENOME.out.fai : [], + (params.protocol == 'amplicon' || !params.skip_asciigenome || !params.skip_markduplicates) ? PREPARE_GENOME.out.chrom_sizes : [], + params.gff ? PREPARE_GENOME.out.gff : [], + (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], + PREPARE_GENOME.out.snpeff_db, + PREPARE_GENOME.out.snpeff_config, + ch_ivar_variants_header_mqc + ) + ch_vcf = VARIANTS_IVAR.out.vcf + ch_tbi = VARIANTS_IVAR.out.tbi + ch_ivar_counts_multiqc = VARIANTS_IVAR.out.multiqc_tsv + ch_bcftools_stats_multiqc = VARIANTS_IVAR.out.stats + ch_snpeff_multiqc = VARIANTS_IVAR.out.snpeff_csv + ch_snpsift_txt = VARIANTS_IVAR.out.snpsift_txt + ch_versions = ch_versions.mix(VARIANTS_IVAR.out.versions) + + + // MODULE: Merge ivar and lofreq variants + BCFTOOLS_MERGE( + ch_lofreq_vcf, + ch_vcf, + //ch_vcf, + //ch_tbi, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai + ) + ch_merged_vcf = BCFTOOLS_MERGE.out.merged_variants + ch_versions = ch_versions.mix(BCFTOOLS_MERGE.out.versions) + } + + // // SUBWORKFLOW: Call variants with IVar //