diff --git a/CHANGELOG.md b/CHANGELOG.md index 27b027f0..1889c243 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#104](https://github.com/nf-core/bacass/pull/104) - Added dragonflye module for long-reads assembly + ### `Fixed` ### `Dependencies` diff --git a/README.md b/README.md index 2a10a542..445f6801 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,9 @@ This pipeline is primarily for bacterial assembly of next-generation sequencing ### Long Read Assembly For users that only have Nanopore data, the pipeline quality trims these using [PoreChop](https://github.com/rrwick/Porechop) and assesses basic sequencing QC utilizing [NanoPlot](https://github.com/wdecoster/NanoPlot) and [PycoQC](https://github.com/a-slide/pycoQC). -The pipeline can then perform long read assembly utilizing [Unicycler](https://github.com/rrwick/Unicycler), [Miniasm](https://github.com/lh3/miniasm) in combination with [Racon](https://github.com/isovic/racon), or [Canu](https://github.com/marbl/canu). Long reads assembly can be polished using [Medaka](https://github.com/nanoporetech/medaka) or [NanoPolish](https://github.com/jts/nanopolish) with Fast5 files. +The pipeline can then perform long read assembly utilizing [Unicycler](https://github.com/rrwick/Unicycler), [Miniasm](https://github.com/lh3/miniasm) in combination with [Racon](https://github.com/isovic/racon), [Canu](https://github.com/marbl/canu) or [Flye](https://github.com/fenderglass/Flye) by using the [Dragonflye](https://github.com/rpetit3/dragonflye)(\*) pipeline. Long reads assembly can be polished using [Medaka](https://github.com/nanoporetech/medaka) or [NanoPolish](https://github.com/jts/nanopolish) with Fast5 files. + +> **\*Note**: Dragonflye is a comprehensive pipeline designed for genome assembly of Oxford Nanopore Reads. It facilitates the utilization of Flye (default), Miniasm, and Raven assemblers, along with Racon(default) and Medaka polishers. For more information, visit the [Dragonflye GitHub](https://github.com/rpetit3/dragonflye) repository. ### Hybrid Assembly diff --git a/assets/schema_input.json b/assets/schema_input.json index a34ad666..2b2bbe74 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -73,6 +73,7 @@ }, "GenomeSize": { "errorMessage": "A number (including decimals) ending with 'm', representing genome size. No spaces allowed.", + "meta": ["gsize"], "anyOf": [ { "type": ["string", "null"], diff --git a/conf/modules.config b/conf/modules.config index 702ce9e8..4f39a538 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -94,6 +94,33 @@ process { ] } + withName: 'DRAGONFLYE' { + ext.args = { + if ( !$meta.gsize?.equals('NA') ){ + if ( !params.dragonflye_args.contains("--gsize") ) { + "--gsize ${meta.gsize} ${params.dragonflye_args}" + } else { + params.dragonflye_args ?: '' + } + } else { + params.dragonflye_args ?: '' + } + } + + publishDir = [ + path: { "${params.outdir}/Dragonflye" }, + mode: params.publish_dir_mode, + pattern: "*.{fa,log}", + saveAs: { filename -> + if (filename.equals('versions.yml')) { + null + } else { + "${meta.id}.${filename}" + } + } + ] + } + withName: 'RACON' { ext.args = '' publishDir = [ diff --git a/conf/test_long_dragonflye.config b/conf/test_long_dragonflye.config new file mode 100644 index 00000000..304fb4d8 --- /dev/null +++ b/conf/test_long_dragonflye.config @@ -0,0 +1,26 @@ +/* +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/bacass -profile test_long_dragonflye, + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test_long_dragonfyle profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_long_miniasm.tsv' + + // some extra args to speed tests up + prokka_args = " --fast" + assembly_type = 'long' + assembler = 'dragonflye' + skip_kraken2 = true + skip_polish = true +} diff --git a/docs/output.md b/docs/output.md index 4c58ec20..ba44aa38 100644 --- a/docs/output.md +++ b/docs/output.md @@ -146,6 +146,12 @@ Check out the [Canu documentation](https://canu.readthedocs.io/en/latest/index.h Check out the [Miniasm documentation](https://github.com/lh3/miniasm) for more information on Miniasm output. +- `Dragonflye/` + - `*.contigs.fa`: Assembly in Fasta format + - `*.dragonflye.log`: Log file containing the report of the dragonflye process + +Checkout the [Dragonflye](https://github.com/rpetit3/dragonflye) documentation for more information of the Dragonflye output. + ### Polished assemblies diff --git a/modules.json b/modules.json index 1eda682c..205b7a79 100644 --- a/modules.json +++ b/modules.json @@ -25,6 +25,11 @@ "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", "installed_by": ["modules"] }, + "dragonflye": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules"] + }, "fastp": { "branch": "master", "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd", diff --git a/modules/nf-core/dragonflye/environment.yml b/modules/nf-core/dragonflye/environment.yml new file mode 100644 index 00000000..8a7ad456 --- /dev/null +++ b/modules/nf-core/dragonflye/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::dragonflye=1.0.11 diff --git a/modules/nf-core/dragonflye/main.nf b/modules/nf-core/dragonflye/main.nf new file mode 100644 index 00000000..bc3527a7 --- /dev/null +++ b/modules/nf-core/dragonflye/main.nf @@ -0,0 +1,41 @@ +process DRAGONFLYE { + tag "$meta.id" + label 'process_medium' + + conda 'modules/nf-core/dragonflye/environment.yml' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/dragonflye:1.0.11--hdfd78af_0' : + 'biocontainers/dragonflye:1.0.11--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("contigs.fa") , emit: contigs + tuple val(meta), path("dragonflye.log") , emit: log + tuple val(meta), path("{flye,miniasm,raven}.fasta") , emit: raw_contigs + tuple val(meta), path("{miniasm,raven}-unpolished.gfa"), optional:true , emit: gfa + tuple val(meta), path("flye-info.txt"), optional:true , emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def memory = task.memory.toGiga() + """ + dragonflye \\ + --reads ${reads} \\ + $args \\ + --cpus $task.cpus \\ + --ram $memory \\ + --outdir ./ \\ + --force + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragonflye: \$(dragonflye --version 2>&1 | sed 's/^.*dragonflye //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/dragonflye/meta.yml b/modules/nf-core/dragonflye/meta.yml new file mode 100644 index 00000000..13b9ad66 --- /dev/null +++ b/modules/nf-core/dragonflye/meta.yml @@ -0,0 +1,56 @@ +name: dragonflye +description: Assemble bacterial isolate genomes from Nanopore reads +keywords: + - bacterial + - assembly + - nanopore +tools: + - dragonflye: + description: Microbial assembly pipeline for Nanopore reads + homepage: https://github.com/rpetit3/dragonflye + documentation: https://github.com/rpetit3/dragonflye/blob/main/README.md + licence: ["GPL v2"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Input Nanopore FASTQ file + pattern: "*.fastq.gz" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - contigs: + type: file + description: The final assembly produced by Dragonflye + pattern: "contigs.fa" + - log: + type: file + description: Full log file for bug reporting + pattern: "dragonflye.log" + - raw_contigs: + type: file + description: Raw assembly produced by the assembler (Flye, Miniasm, or Raven) + pattern: "{flye,miniasm,raven}.fasta" + - txt: + type: file + description: Assembly information output by Flye + pattern: "flye-info.txt" + - gfa: + type: file + description: Assembly graph produced by Miniasm, or Raven + pattern: "{miniasm,raven}-unpolished.gfa" +authors: + - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/dragonflye/tests/main.nf.test b/modules/nf-core/dragonflye/tests/main.nf.test new file mode 100644 index 00000000..1eadc7f4 --- /dev/null +++ b/modules/nf-core/dragonflye/tests/main.nf.test @@ -0,0 +1,78 @@ +nextflow_process { + + name "Test Process DRAGONFLYE" + script "../main.nf" + process "DRAGONFLYE" + tag "modules" + tag "modules_nfcore" + tag "dragonflye" + + + test("Dragonflye with miniasm") { + config "./nextflow.miniasm.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file("https://github.com/nf-core/test-datasets/raw/bacass/nanopore/subset15000.fq.gz", checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.raw_contigs).match("miniasm_raw_contigs") }, + { assert snapshot(process.out.gfa).match("miniasm_gfa") }, + { assert snapshot(process.out.versions).match("versions") }, + // MD5sum not reproducible (timestamp, contig order) + { assert new File("${outputDir}/dragonflye/contigs.fa").exists() }, + { assert new File("${outputDir}/dragonflye/dragonflye.log").exists() } + + ) + } + + } + + + + test("Dragonflye with raven") { + config "./nextflow.raven.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file("https://github.com/nf-core/test-datasets/raw/bacass/nanopore/subset15000.fq.gz", checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + // MD5sum not reproducible (timestamp, contig order) + { assert new File("${outputDir}/dragonflye/contigs.fa").exists() }, + { assert new File("${outputDir}/dragonflye/dragonflye.log").exists() }, + { assert new File("${outputDir}/dragonflye/raven.fasta").exists() }, + { assert new File("${outputDir}/dragonflye/raven-unpolished.gfa").exists() }, + + ) + } + + } + + +} diff --git a/modules/nf-core/dragonflye/tests/main.nf.test.snap b/modules/nf-core/dragonflye/tests/main.nf.test.snap new file mode 100644 index 00000000..64acac41 --- /dev/null +++ b/modules/nf-core/dragonflye/tests/main.nf.test.snap @@ -0,0 +1,38 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,96447a7a742e9ea4f497dd4d19bf5d1b" + ] + ], + "timestamp": "2023-10-19T08:04:24.882463835" + }, + "miniasm_raw_contigs": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "miniasm.fasta:md5,6b8903ba09592df99f43ed05fda488f6" + ] + ] + ], + "timestamp": "2023-10-19T08:04:24.843252417" + }, + "miniasm_gfa": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "miniasm-unpolished.gfa:md5,40ab03a417eafab0cb4ac2c32bd006e1" + ] + ] + ], + "timestamp": "2023-10-19T08:04:24.863920486" + } +} \ No newline at end of file diff --git a/modules/nf-core/dragonflye/tests/nextflow.miniasm.config b/modules/nf-core/dragonflye/tests/nextflow.miniasm.config new file mode 100644 index 00000000..2ab6dcbe --- /dev/null +++ b/modules/nf-core/dragonflye/tests/nextflow.miniasm.config @@ -0,0 +1,5 @@ +process { + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + ext.args = '--assembler miniasm --gsize 5000000' +} + diff --git a/modules/nf-core/dragonflye/tests/nextflow.raven.config b/modules/nf-core/dragonflye/tests/nextflow.raven.config new file mode 100644 index 00000000..b971e01e --- /dev/null +++ b/modules/nf-core/dragonflye/tests/nextflow.raven.config @@ -0,0 +1,5 @@ +process { + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + ext.args = '--assembler raven --gsize 5000000' +} + diff --git a/modules/nf-core/dragonflye/tests/tags.yml b/modules/nf-core/dragonflye/tests/tags.yml new file mode 100644 index 00000000..d737a914 --- /dev/null +++ b/modules/nf-core/dragonflye/tests/tags.yml @@ -0,0 +1,2 @@ +dragonflye: + - modules/nf-core/dragonflye/** diff --git a/nextflow.config b/nextflow.config index 48520b15..fdf6d8c4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,11 +20,12 @@ params { kraken2db = "" // Assembly parameters - assembler = 'unicycler' // Allowed: ['unicycler', 'canu', 'miniasm'] + assembler = 'unicycler' // Allowed: ['unicycler', 'canu', 'miniasm', 'dragonflye'] assembly_type = 'short' // Allowed: ['short', 'long', 'hybrid'] (hybrid works only with Unicycler) unicycler_args = "" canu_mode = '-nanopore' // Allowed: ['-pacbio', '-nanopore', '-pacbio-hifi'] canu_args = '' // Default no extra options, can be adjusted by the user + dragonflye_args = '' // Assembly polishing polish_method = 'medaka' @@ -195,12 +196,13 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_dfast { includeConfig 'conf/test_dfast.config' } - test_hybrid { includeConfig 'conf/test_hybrid.config' } - test_long { includeConfig 'conf/test_long.config' } - test_long_miniasm { includeConfig 'conf/test_long_miniasm.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_dfast { includeConfig 'conf/test_dfast.config' } + test_hybrid { includeConfig 'conf/test_hybrid.config' } + test_long { includeConfig 'conf/test_long.config' } + test_long_miniasm { includeConfig 'conf/test_long_miniasm.config' } + test_long_dragonflye{ includeConfig 'conf/test_long_dragonflye.config' } + test_full { includeConfig 'conf/test_full.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile diff --git a/nextflow_schema.json b/nextflow_schema.json index 6b416b30..2eb706e0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -87,7 +87,8 @@ "type": "string", "default": "unicycler", "fa_icon": "fas fa-puzzle-piece", - "description": "The assembler to use for assembly. Available options are `Unicycler`, `Canu`, `Miniasm`. The latter two are only available for long-read data, whereas Unicycler can be used for short or hybrid assembly projects." + "description": "The assembler to use for assembly. Available options are `Unicycler`, `Canu`, `Miniasm`, or `Dragonflye`. The latter trhee are only available for long-read data, whereas Unicycler can be used for short or hybrid assembly projects.", + "enum": ["unicycler", "canu", "miniasm", "dragonflye"] }, "assembly_type": { "type": "string", @@ -111,6 +112,11 @@ "type": "string", "fa_icon": "fas fa-ship", "description": "This can be used to supply [extra options](https://canu.readthedocs.io/en/latest/quick-start.html) to the Canu assembler. Will be ignored when other assemblers are used." + }, + "dragonflye_args": { + "type": "string", + "description": "Extra arguments for [Dragonflye](https://github.com/rpetit3/dragonflye#usage)", + "help_text": "This advanced option allows you to add extra arguments to Dragonflye (e.g.: `\"--gsize 2.4m\"`). For those arguments with no values/options associated (e.g.: `\"--nopolish\"` or `\"--nofilter\"`...) you need to add an extra space at the begining of the input string to params.dragonflye_args. Example: --params.dragonflye_args ' --nopolish'" } } }, diff --git a/workflows/bacass.nf b/workflows/bacass.nf index 4a6d03a9..708ce8c4 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -81,6 +81,7 @@ include { MINIMAP2_ALIGN } from '../modules/nf-core/minim include { MINIMAP2_ALIGN as MINIMAP2_CONSENSUS } from '../modules/nf-core/minimap2/align/main' include { MINIMAP2_ALIGN as MINIMAP2_POLISH } from '../modules/nf-core/minimap2/align/main' include { MINIASM } from '../modules/nf-core/miniasm/main' +include { DRAGONFLYE } from '../modules/nf-core/dragonflye/main' include { RACON } from '../modules/nf-core/racon/main' include { SAMTOOLS_SORT } from '../modules/nf-core/samtools/sort/main' include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' @@ -115,10 +116,10 @@ workflow BACASS { // SUBWORKFLOW: Read in samplesheet, validate and stage input files // def criteria = multiMapCriteria { - meta, fastq_1, fastq_2, long_fastq, fast5, genome_size -> - shortreads: fastq_1 != 'NA' ? tuple(tuple(meta, [fastq_1, fastq_2])) : null - longreads: long_fastq != 'NA' ? tuple(meta, long_fastq) : null - fast5: fast5 != 'NA' ? tuple(meta, fast5) : null + meta, fastq_1, fastq_2, long_fastq, fast5 -> + shortreads: fastq_1 != 'NA' ? tuple(meta, [fastq_1, fastq_2]) : null + longreads: long_fastq != 'NA' ? tuple(meta, long_fastq) : null + fast5: fast5 != 'NA' ? tuple(meta, fast5) : null } // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ Channel @@ -220,7 +221,7 @@ workflow BACASS { } // - // ASSEMBLY: Unicycler, Canu, Miniasm + // ASSEMBLY: Unicycler, Canu, Miniasm, Dragonflye // ch_assembly = Channel.empty() @@ -289,7 +290,18 @@ workflow BACASS { ch_for_racon ) ch_assembly = ch_assembly.mix( RACON.out.improved_assembly.dump(tag: 'miniasm') ) - ch_versions = ch_versions.mix(RACON.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix( RACON.out.versions.ifEmpty(null) ) + } + + // + // MODULE: Dragonflye, genome assembly, long reads + // + if( params.assembler == 'dragonflye' ){ + DRAGONFLYE( + ch_for_assembly.map { meta, sr, lr -> tuple(meta, lr) } + ) + ch_assembly = ch_assembly.mix( DRAGONFLYE.out.contigs.dump(tag: 'dragonflye') ) + ch_versions = ch_versions.mix( DRAGONFLYE.out.versions.ifEmpty(null) ) } // @@ -391,14 +403,23 @@ workflow BACASS { ch_quast_multiqc = QUAST.out.tsv ch_versions = ch_versions.mix(QUAST.out.versions.ifEmpty(null)) + // Check assemblies that require further processing for gene annotation + ch_assembly + .branch{ meta, fasta -> + gzip: fasta.name.endsWith('.gz') + skip: true + } + .set{ ch_assembly_for_gunzip } + // // MODULE: PROKKA, gene annotation // ch_prokka_txt_multiqc = Channel.empty() if ( !params.skip_annotation && params.annotation_tool == 'prokka' ) { - GUNZIP ( ch_assembly ) - ch_to_prokka = GUNZIP.out.gunzip - ch_versions = ch_versions.mix(GUNZIP.out.versions.ifEmpty(null)) + // Uncompress assembly for annotation if necessary + GUNZIP ( ch_assembly_for_gunzip.gzip ) + ch_to_prokka = ch_assembly_for_gunzip.skip.mix( GUNZIP.out.gunzip ) + ch_versions = ch_versions.mix( GUNZIP.out.versions.ifEmpty(null) ) PROKKA ( ch_to_prokka, @@ -412,19 +433,18 @@ workflow BACASS { // // MODULE: BAKTA, gene annotation // - ch_bakta_txt_multiqc = Channel.empty() if ( !params.skip_annotation && params.annotation_tool == 'bakta' ) { - GUNZIP ( ch_assembly ) - ch_to_bakta = GUNZIP.out.gunzip - ch_versions = ch_versions.mix(GUNZIP.out.versions.ifEmpty(null)) + // Uncompress assembly for annotation if necessary + GUNZIP ( ch_assembly_for_gunzip.gzip ) + ch_to_bakta = ch_assembly_for_gunzip.skip.mix( GUNZIP.out.gunzip ) + ch_versions = ch_versions.mix( GUNZIP.out.versions.ifEmpty(null) ) BAKTA_DBDOWNLOAD_RUN ( ch_to_bakta, params.baktadb, params.baktadb_download ) - ch_bakta_txt_multiqc = BAKTA_DBDOWNLOAD_RUN.out.bakta_txt_multiqc.collect() ch_versions = ch_versions.mix(BAKTA_DBDOWNLOAD_RUN.out.versions) }