From 06e76ca0a67780ed48aee8f1d04dec8987e33568 Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Wed, 25 Oct 2023 17:24:03 +0200 Subject: [PATCH 01/12] add nfcore module dragonfyle --- conf/modules.config | 16 ++++ modules.json | 5 ++ modules/nf-core/dragonflye/environment.yml | 6 ++ modules/nf-core/dragonflye/main.nf | 41 ++++++++++ modules/nf-core/dragonflye/meta.yml | 56 +++++++++++++ modules/nf-core/dragonflye/tests/main.nf.test | 78 +++++++++++++++++++ .../dragonflye/tests/main.nf.test.snap | 38 +++++++++ .../dragonflye/tests/nextflow.miniasm.config | 5 ++ .../dragonflye/tests/nextflow.raven.config | 5 ++ modules/nf-core/dragonflye/tests/tags.yml | 2 + nextflow.config | 3 +- nextflow_schema.json | 7 +- workflows/bacass.nf | 17 +++- 13 files changed, 275 insertions(+), 4 deletions(-) create mode 100644 modules/nf-core/dragonflye/environment.yml create mode 100644 modules/nf-core/dragonflye/main.nf create mode 100644 modules/nf-core/dragonflye/meta.yml create mode 100644 modules/nf-core/dragonflye/tests/main.nf.test create mode 100644 modules/nf-core/dragonflye/tests/main.nf.test.snap create mode 100644 modules/nf-core/dragonflye/tests/nextflow.miniasm.config create mode 100644 modules/nf-core/dragonflye/tests/nextflow.raven.config create mode 100644 modules/nf-core/dragonflye/tests/tags.yml diff --git a/conf/modules.config b/conf/modules.config index 702ce9e8..c52b73db 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -94,6 +94,22 @@ process { ] } + withName: 'DRAGONFLYE' { + ext.args = params.dragonflye_args ? params.dragonflye_args : '' + publishDir = [ + path: { "${params.outdir}/Dragonflye" }, + mode: params.publish_dir_mode, + pattern: '*.fa', + saveAs: { filename -> + if (filename.equals('versions.yml')) { + null + } else { + "${meta.id}.${filename}" + } + } + ] + } + withName: 'RACON' { ext.args = '' publishDir = [ diff --git a/modules.json b/modules.json index 87f35378..6e809cac 100644 --- a/modules.json +++ b/modules.json @@ -26,6 +26,11 @@ "git_sha": "05c280924b6c768d484c7c443dad5e605c4ff4b4", "installed_by": ["modules"] }, + "dragonflye": { + "branch": "master", + "git_sha": "516189e968feb4ebdd9921806988b4c12b4ac2dc", + "installed_by": ["modules"] + }, "fastp": { "branch": "master", "git_sha": "d497a4868ace3302016ea8ed4b395072d5e833cd", diff --git a/modules/nf-core/dragonflye/environment.yml b/modules/nf-core/dragonflye/environment.yml new file mode 100644 index 00000000..8a7ad456 --- /dev/null +++ b/modules/nf-core/dragonflye/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::dragonflye=1.0.11 diff --git a/modules/nf-core/dragonflye/main.nf b/modules/nf-core/dragonflye/main.nf new file mode 100644 index 00000000..bc3527a7 --- /dev/null +++ b/modules/nf-core/dragonflye/main.nf @@ -0,0 +1,41 @@ +process DRAGONFLYE { + tag "$meta.id" + label 'process_medium' + + conda 'modules/nf-core/dragonflye/environment.yml' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/dragonflye:1.0.11--hdfd78af_0' : + 'biocontainers/dragonflye:1.0.11--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("contigs.fa") , emit: contigs + tuple val(meta), path("dragonflye.log") , emit: log + tuple val(meta), path("{flye,miniasm,raven}.fasta") , emit: raw_contigs + tuple val(meta), path("{miniasm,raven}-unpolished.gfa"), optional:true , emit: gfa + tuple val(meta), path("flye-info.txt"), optional:true , emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def memory = task.memory.toGiga() + """ + dragonflye \\ + --reads ${reads} \\ + $args \\ + --cpus $task.cpus \\ + --ram $memory \\ + --outdir ./ \\ + --force + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + dragonflye: \$(dragonflye --version 2>&1 | sed 's/^.*dragonflye //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/dragonflye/meta.yml b/modules/nf-core/dragonflye/meta.yml new file mode 100644 index 00000000..13b9ad66 --- /dev/null +++ b/modules/nf-core/dragonflye/meta.yml @@ -0,0 +1,56 @@ +name: dragonflye +description: Assemble bacterial isolate genomes from Nanopore reads +keywords: + - bacterial + - assembly + - nanopore +tools: + - dragonflye: + description: Microbial assembly pipeline for Nanopore reads + homepage: https://github.com/rpetit3/dragonflye + documentation: https://github.com/rpetit3/dragonflye/blob/main/README.md + licence: ["GPL v2"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Input Nanopore FASTQ file + pattern: "*.fastq.gz" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - contigs: + type: file + description: The final assembly produced by Dragonflye + pattern: "contigs.fa" + - log: + type: file + description: Full log file for bug reporting + pattern: "dragonflye.log" + - raw_contigs: + type: file + description: Raw assembly produced by the assembler (Flye, Miniasm, or Raven) + pattern: "{flye,miniasm,raven}.fasta" + - txt: + type: file + description: Assembly information output by Flye + pattern: "flye-info.txt" + - gfa: + type: file + description: Assembly graph produced by Miniasm, or Raven + pattern: "{miniasm,raven}-unpolished.gfa" +authors: + - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/dragonflye/tests/main.nf.test b/modules/nf-core/dragonflye/tests/main.nf.test new file mode 100644 index 00000000..1eadc7f4 --- /dev/null +++ b/modules/nf-core/dragonflye/tests/main.nf.test @@ -0,0 +1,78 @@ +nextflow_process { + + name "Test Process DRAGONFLYE" + script "../main.nf" + process "DRAGONFLYE" + tag "modules" + tag "modules_nfcore" + tag "dragonflye" + + + test("Dragonflye with miniasm") { + config "./nextflow.miniasm.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file("https://github.com/nf-core/test-datasets/raw/bacass/nanopore/subset15000.fq.gz", checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.raw_contigs).match("miniasm_raw_contigs") }, + { assert snapshot(process.out.gfa).match("miniasm_gfa") }, + { assert snapshot(process.out.versions).match("versions") }, + // MD5sum not reproducible (timestamp, contig order) + { assert new File("${outputDir}/dragonflye/contigs.fa").exists() }, + { assert new File("${outputDir}/dragonflye/dragonflye.log").exists() } + + ) + } + + } + + + + test("Dragonflye with raven") { + config "./nextflow.raven.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + + input[0] = [ [ id:'test', single_end:true ], // meta map + [ file("https://github.com/nf-core/test-datasets/raw/bacass/nanopore/subset15000.fq.gz", checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + // MD5sum not reproducible (timestamp, contig order) + { assert new File("${outputDir}/dragonflye/contigs.fa").exists() }, + { assert new File("${outputDir}/dragonflye/dragonflye.log").exists() }, + { assert new File("${outputDir}/dragonflye/raven.fasta").exists() }, + { assert new File("${outputDir}/dragonflye/raven-unpolished.gfa").exists() }, + + ) + } + + } + + +} diff --git a/modules/nf-core/dragonflye/tests/main.nf.test.snap b/modules/nf-core/dragonflye/tests/main.nf.test.snap new file mode 100644 index 00000000..64acac41 --- /dev/null +++ b/modules/nf-core/dragonflye/tests/main.nf.test.snap @@ -0,0 +1,38 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,96447a7a742e9ea4f497dd4d19bf5d1b" + ] + ], + "timestamp": "2023-10-19T08:04:24.882463835" + }, + "miniasm_raw_contigs": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "miniasm.fasta:md5,6b8903ba09592df99f43ed05fda488f6" + ] + ] + ], + "timestamp": "2023-10-19T08:04:24.843252417" + }, + "miniasm_gfa": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "miniasm-unpolished.gfa:md5,40ab03a417eafab0cb4ac2c32bd006e1" + ] + ] + ], + "timestamp": "2023-10-19T08:04:24.863920486" + } +} \ No newline at end of file diff --git a/modules/nf-core/dragonflye/tests/nextflow.miniasm.config b/modules/nf-core/dragonflye/tests/nextflow.miniasm.config new file mode 100644 index 00000000..2ab6dcbe --- /dev/null +++ b/modules/nf-core/dragonflye/tests/nextflow.miniasm.config @@ -0,0 +1,5 @@ +process { + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + ext.args = '--assembler miniasm --gsize 5000000' +} + diff --git a/modules/nf-core/dragonflye/tests/nextflow.raven.config b/modules/nf-core/dragonflye/tests/nextflow.raven.config new file mode 100644 index 00000000..b971e01e --- /dev/null +++ b/modules/nf-core/dragonflye/tests/nextflow.raven.config @@ -0,0 +1,5 @@ +process { + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + ext.args = '--assembler raven --gsize 5000000' +} + diff --git a/modules/nf-core/dragonflye/tests/tags.yml b/modules/nf-core/dragonflye/tests/tags.yml new file mode 100644 index 00000000..d737a914 --- /dev/null +++ b/modules/nf-core/dragonflye/tests/tags.yml @@ -0,0 +1,2 @@ +dragonflye: + - modules/nf-core/dragonflye/** diff --git a/nextflow.config b/nextflow.config index 48520b15..5ae7fcc2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -20,11 +20,12 @@ params { kraken2db = "" // Assembly parameters - assembler = 'unicycler' // Allowed: ['unicycler', 'canu', 'miniasm'] + assembler = 'unicycler' // Allowed: ['unicycler', 'canu', 'miniasm', 'dragonflye'] assembly_type = 'short' // Allowed: ['short', 'long', 'hybrid'] (hybrid works only with Unicycler) unicycler_args = "" canu_mode = '-nanopore' // Allowed: ['-pacbio', '-nanopore', '-pacbio-hifi'] canu_args = '' // Default no extra options, can be adjusted by the user + dragonflye_args = '' // Assembly polishing polish_method = 'medaka' diff --git a/nextflow_schema.json b/nextflow_schema.json index 6b416b30..31d65e92 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -87,7 +87,8 @@ "type": "string", "default": "unicycler", "fa_icon": "fas fa-puzzle-piece", - "description": "The assembler to use for assembly. Available options are `Unicycler`, `Canu`, `Miniasm`. The latter two are only available for long-read data, whereas Unicycler can be used for short or hybrid assembly projects." + "description": "The assembler to use for assembly. Available options are `Unicycler`, `Canu`, `Miniasm`, or `Dragonflye`. The latter trhee are only available for long-read data, whereas Unicycler can be used for short or hybrid assembly projects.", + "enum": ["unicycler", "canu", "miniasm", "dragonflye"] }, "assembly_type": { "type": "string", @@ -111,6 +112,10 @@ "type": "string", "fa_icon": "fas fa-ship", "description": "This can be used to supply [extra options](https://canu.readthedocs.io/en/latest/quick-start.html) to the Canu assembler. Will be ignored when other assemblers are used." + }, + "dragonflye_args": { + "type": "string", + "description": "Extra arguments for [Dragonflye](https://github.com/rpetit3/dragonflye#usage)" } } }, diff --git a/workflows/bacass.nf b/workflows/bacass.nf index 4a6d03a9..c80f216a 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -81,6 +81,7 @@ include { MINIMAP2_ALIGN } from '../modules/nf-core/minim include { MINIMAP2_ALIGN as MINIMAP2_CONSENSUS } from '../modules/nf-core/minimap2/align/main' include { MINIMAP2_ALIGN as MINIMAP2_POLISH } from '../modules/nf-core/minimap2/align/main' include { MINIASM } from '../modules/nf-core/miniasm/main' +include { DRAGONFLYE } from '../modules/nf-core/dragonflye/main' include { RACON } from '../modules/nf-core/racon/main' include { SAMTOOLS_SORT } from '../modules/nf-core/samtools/sort/main' include { SAMTOOLS_INDEX } from '../modules/nf-core/samtools/index/main' @@ -220,7 +221,7 @@ workflow BACASS { } // - // ASSEMBLY: Unicycler, Canu, Miniasm + // ASSEMBLY: Unicycler, Canu, Miniasm, Dragonflye // ch_assembly = Channel.empty() @@ -289,7 +290,18 @@ workflow BACASS { ch_for_racon ) ch_assembly = ch_assembly.mix( RACON.out.improved_assembly.dump(tag: 'miniasm') ) - ch_versions = ch_versions.mix(RACON.out.versions.ifEmpty(null)) + ch_versions = ch_versions.mix( RACON.out.versions.ifEmpty(null) ) + } + + // + // MODULE: Dragonflye, genome assembly, long reads + // + if( params.assembler == 'dragonflye' ){ + DRAGONFLYE( + ch_for_assembly.map { meta, sr, lr -> tuple(meta, lr) } + ) + ch_assembly = ch_assembly.mix( DRAGONFLYE.out.contigs.dump(tag: 'dragonflye') ) + ch_versions = ch_versions.mix( DRAGONFLYE.out.versions.ifEmpty(null) ) } // @@ -390,6 +402,7 @@ workflow BACASS { ) ch_quast_multiqc = QUAST.out.tsv ch_versions = ch_versions.mix(QUAST.out.versions.ifEmpty(null)) + ch_to_quast.view() // // MODULE: PROKKA, gene annotation From 296bb394054c23a2ec5aff8fbd927ba0002c1d18 Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Fri, 27 Oct 2023 17:26:01 +0200 Subject: [PATCH 02/12] create test for assembly with dragonflye --- conf/test_long_dragonflye.config | 26 ++++++++++++++++++++++++++ nextflow.config | 13 +++++++------ 2 files changed, 33 insertions(+), 6 deletions(-) create mode 100644 conf/test_long_dragonflye.config diff --git a/conf/test_long_dragonflye.config b/conf/test_long_dragonflye.config new file mode 100644 index 00000000..304fb4d8 --- /dev/null +++ b/conf/test_long_dragonflye.config @@ -0,0 +1,26 @@ +/* +======================================================================================== + Nextflow config file for running minimal tests +======================================================================================== + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/bacass -profile test_long_dragonflye, + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test_long_dragonfyle profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/bacass/bacass_long_miniasm.tsv' + + // some extra args to speed tests up + prokka_args = " --fast" + assembly_type = 'long' + assembler = 'dragonflye' + skip_kraken2 = true + skip_polish = true +} diff --git a/nextflow.config b/nextflow.config index 5ae7fcc2..fdf6d8c4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -196,12 +196,13 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_dfast { includeConfig 'conf/test_dfast.config' } - test_hybrid { includeConfig 'conf/test_hybrid.config' } - test_long { includeConfig 'conf/test_long.config' } - test_long_miniasm { includeConfig 'conf/test_long_miniasm.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_dfast { includeConfig 'conf/test_dfast.config' } + test_hybrid { includeConfig 'conf/test_hybrid.config' } + test_long { includeConfig 'conf/test_long.config' } + test_long_miniasm { includeConfig 'conf/test_long_miniasm.config' } + test_long_dragonflye{ includeConfig 'conf/test_long_dragonflye.config' } + test_full { includeConfig 'conf/test_full.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile From e27d4c1c2c24f92b357ef004a71095532ba44bd1 Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Fri, 27 Oct 2023 17:36:15 +0200 Subject: [PATCH 03/12] tmp fix for handling gzip/no-gzip assemblies --- workflows/bacass.nf | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/workflows/bacass.nf b/workflows/bacass.nf index c80f216a..50534136 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -296,6 +296,7 @@ workflow BACASS { // // MODULE: Dragonflye, genome assembly, long reads // + // TODO: Allow pipeline to get the GenomeSize and input this as params.dragonflye_args = "--gsize $genomeSize" if( params.assembler == 'dragonflye' ){ DRAGONFLYE( ch_for_assembly.map { meta, sr, lr -> tuple(meta, lr) } @@ -402,16 +403,21 @@ workflow BACASS { ) ch_quast_multiqc = QUAST.out.tsv ch_versions = ch_versions.mix(QUAST.out.versions.ifEmpty(null)) - ch_to_quast.view() // // MODULE: PROKKA, gene annotation // ch_prokka_txt_multiqc = Channel.empty() if ( !params.skip_annotation && params.annotation_tool == 'prokka' ) { - GUNZIP ( ch_assembly ) - ch_to_prokka = GUNZIP.out.gunzip - ch_versions = ch_versions.mix(GUNZIP.out.versions.ifEmpty(null)) + // Uncompress assembly for annotation if necessary + if( !ch_assembly.map{ it[1].endsWith('.gz') }.any() ){ // FIXME: This should't be ""!"". + GUNZIP ( ch_assembly ) + ch_to_prokka = GUNZIP.out.gunzip + ch_versions = ch_versions.mix(GUNZIP.out.versions.ifEmpty(null)) + } else { + ch_assembly + .set{ ch_to_prokka } + } PROKKA ( ch_to_prokka, @@ -425,12 +431,17 @@ workflow BACASS { // // MODULE: BAKTA, gene annotation // - ch_bakta_txt_multiqc = Channel.empty() if ( !params.skip_annotation && params.annotation_tool == 'bakta' ) { - GUNZIP ( ch_assembly ) - ch_to_bakta = GUNZIP.out.gunzip - ch_versions = ch_versions.mix(GUNZIP.out.versions.ifEmpty(null)) + // Uncompress assembly for annotation if necessary + if( !ch_assembly.map{ it[1].endsWith('.gz')}.any() ){ // FIXME: This should't be ""!"". + GUNZIP ( ch_assembly ) + ch_to_bakta = GUNZIP.out.gunzip + ch_versions = ch_versions.mix(GUNZIP.out.versions.ifEmpty(null)) + } else { + ch_assembly + .set{ ch_to_bakta } + } BAKTA_DBDOWNLOAD_RUN ( ch_to_bakta, From 8a8d754bbe567ec2fdbf9c26688f2915a4969980 Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Fri, 27 Oct 2023 17:44:04 +0200 Subject: [PATCH 04/12] add genome_size value in the input file to meta map --- assets/schema_input.json | 1 + workflows/bacass.nf | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index a34ad666..2b2bbe74 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -73,6 +73,7 @@ }, "GenomeSize": { "errorMessage": "A number (including decimals) ending with 'm', representing genome size. No spaces allowed.", + "meta": ["gsize"], "anyOf": [ { "type": ["string", "null"], diff --git a/workflows/bacass.nf b/workflows/bacass.nf index 50534136..e02ee79f 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -116,10 +116,10 @@ workflow BACASS { // SUBWORKFLOW: Read in samplesheet, validate and stage input files // def criteria = multiMapCriteria { - meta, fastq_1, fastq_2, long_fastq, fast5, genome_size -> - shortreads: fastq_1 != 'NA' ? tuple(tuple(meta, [fastq_1, fastq_2])) : null - longreads: long_fastq != 'NA' ? tuple(meta, long_fastq) : null - fast5: fast5 != 'NA' ? tuple(meta, fast5) : null + meta, fastq_1, fastq_2, long_fastq, fast5 -> + shortreads: fastq_1 != 'NA' ? tuple(meta, [fastq_1, fastq_2]) : null + longreads: long_fastq != 'NA' ? tuple(meta, long_fastq) : null + fast5: fast5 != 'NA' ? tuple(meta, fast5) : null } // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ Channel From 2a442cdfcd8f0c4388641ac8d8184d86f7ab619c Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Fri, 27 Oct 2023 17:45:09 +0200 Subject: [PATCH 05/12] add genome_size value to ext.args in dragonflye --- conf/modules.config | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index c52b73db..858236b4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -95,7 +95,14 @@ process { } withName: 'DRAGONFLYE' { - ext.args = params.dragonflye_args ? params.dragonflye_args : '' + ext.args = { + if( ${meta.gsize} != 'NA' && !params.dragonflye_args ) { + "--gsize ${meta.gsize}" + } else { + params.dragonflye_args ? params.dragonflye_args : '' + } + } + publishDir = [ path: { "${params.outdir}/Dragonflye" }, mode: params.publish_dir_mode, From 7a529d5a7482030bc8fa7f58b7c027a378131e8b Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Fri, 27 Oct 2023 17:46:28 +0200 Subject: [PATCH 06/12] add dragonfly documentation plus fixme message in workflow --- README.md | 5 ++++- docs/output.md | 3 +++ workflows/bacass.nf | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2a10a542..2ad8a040 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,10 @@ This pipeline is primarily for bacterial assembly of next-generation sequencing ### Long Read Assembly For users that only have Nanopore data, the pipeline quality trims these using [PoreChop](https://github.com/rrwick/Porechop) and assesses basic sequencing QC utilizing [NanoPlot](https://github.com/wdecoster/NanoPlot) and [PycoQC](https://github.com/a-slide/pycoQC). -The pipeline can then perform long read assembly utilizing [Unicycler](https://github.com/rrwick/Unicycler), [Miniasm](https://github.com/lh3/miniasm) in combination with [Racon](https://github.com/isovic/racon), or [Canu](https://github.com/marbl/canu). Long reads assembly can be polished using [Medaka](https://github.com/nanoporetech/medaka) or [NanoPolish](https://github.com/jts/nanopolish) with Fast5 files. +The pipeline can then perform long read assembly utilizing [Unicycler](https://github.com/rrwick/Unicycler), [Miniasm](https://github.com/lh3/miniasm) in combination with [Racon](https://github.com/isovic/racon), [Canu](https://github.com/marbl/canu) or [Flye](https://github.com/fenderglass/Flye) by using the [Dragonflye](https://github.com/rpetit3/dragonflye)* pipeline. Long reads assembly can be polished using [Medaka](https://github.com/nanoporetech/medaka) or [NanoPolish](https://github.com/jts/nanopolish) with Fast5 files. + + +> ***Note**: Dragonflye is a comprehensive pipeline designed for genome assembly of Oxford Nanopore Reads. It facilitates the utilization of Flye (default), Miniasm, and Raven assemblers, along with Racon(default) and Medaka polishers. For more information, visit the [Dragonflye GitHub](https://github.com/rpetit3/dragonflye) repository. ### Hybrid Assembly diff --git a/docs/output.md b/docs/output.md index 4c58ec20..af7c37d5 100644 --- a/docs/output.md +++ b/docs/output.md @@ -146,6 +146,9 @@ Check out the [Canu documentation](https://canu.readthedocs.io/en/latest/index.h Check out the [Miniasm documentation](https://github.com/lh3/miniasm) for more information on Miniasm output. +- `Dragonflye/` + - `*.contigs.fa`: Assembly in Fasta format + ### Polished assemblies diff --git a/workflows/bacass.nf b/workflows/bacass.nf index e02ee79f..28086580 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -410,7 +410,7 @@ workflow BACASS { ch_prokka_txt_multiqc = Channel.empty() if ( !params.skip_annotation && params.annotation_tool == 'prokka' ) { // Uncompress assembly for annotation if necessary - if( !ch_assembly.map{ it[1].endsWith('.gz') }.any() ){ // FIXME: This should't be ""!"". + if( !ch_assembly.map{ it[1].endsWith('.gz') }.any() ){ // FIXME: Not works with dragonflye output. GUNZIP ( ch_assembly ) ch_to_prokka = GUNZIP.out.gunzip ch_versions = ch_versions.mix(GUNZIP.out.versions.ifEmpty(null)) @@ -434,7 +434,7 @@ workflow BACASS { ch_bakta_txt_multiqc = Channel.empty() if ( !params.skip_annotation && params.annotation_tool == 'bakta' ) { // Uncompress assembly for annotation if necessary - if( !ch_assembly.map{ it[1].endsWith('.gz')}.any() ){ // FIXME: This should't be ""!"". + if( ch_assembly.map{ it[1].endsWith('.gz')}.any() ){ // FIXME: Not works with dragonflye output. GUNZIP ( ch_assembly ) ch_to_bakta = GUNZIP.out.gunzip ch_versions = ch_versions.mix(GUNZIP.out.versions.ifEmpty(null)) From 1dab52e7f7d7a4e3b75c17cf00a50b5db4de57c8 Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Fri, 27 Oct 2023 17:51:52 +0200 Subject: [PATCH 07/12] fix prettier --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2ad8a040..7cd33be0 100644 --- a/README.md +++ b/README.md @@ -30,10 +30,9 @@ This pipeline is primarily for bacterial assembly of next-generation sequencing ### Long Read Assembly For users that only have Nanopore data, the pipeline quality trims these using [PoreChop](https://github.com/rrwick/Porechop) and assesses basic sequencing QC utilizing [NanoPlot](https://github.com/wdecoster/NanoPlot) and [PycoQC](https://github.com/a-slide/pycoQC). -The pipeline can then perform long read assembly utilizing [Unicycler](https://github.com/rrwick/Unicycler), [Miniasm](https://github.com/lh3/miniasm) in combination with [Racon](https://github.com/isovic/racon), [Canu](https://github.com/marbl/canu) or [Flye](https://github.com/fenderglass/Flye) by using the [Dragonflye](https://github.com/rpetit3/dragonflye)* pipeline. Long reads assembly can be polished using [Medaka](https://github.com/nanoporetech/medaka) or [NanoPolish](https://github.com/jts/nanopolish) with Fast5 files. +The pipeline can then perform long read assembly utilizing [Unicycler](https://github.com/rrwick/Unicycler), [Miniasm](https://github.com/lh3/miniasm) in combination with [Racon](https://github.com/isovic/racon), [Canu](https://github.com/marbl/canu) or [Flye](https://github.com/fenderglass/Flye) by using the [Dragonflye](https://github.com/rpetit3/dragonflye)\* pipeline. Long reads assembly can be polished using [Medaka](https://github.com/nanoporetech/medaka) or [NanoPolish](https://github.com/jts/nanopolish) with Fast5 files. - -> ***Note**: Dragonflye is a comprehensive pipeline designed for genome assembly of Oxford Nanopore Reads. It facilitates the utilization of Flye (default), Miniasm, and Raven assemblers, along with Racon(default) and Medaka polishers. For more information, visit the [Dragonflye GitHub](https://github.com/rpetit3/dragonflye) repository. +> **\*Note**: Dragonflye is a comprehensive pipeline designed for genome assembly of Oxford Nanopore Reads. It facilitates the utilization of Flye (default), Miniasm, and Raven assemblers, along with Racon(default) and Medaka polishers. For more information, visit the [Dragonflye GitHub](https://github.com/rpetit3/dragonflye) repository. ### Hybrid Assembly From 795a419234429f9719173c7a151564b48baf295c Mon Sep 17 00:00:00 2001 From: Dani VM Date: Sun, 29 Oct 2023 00:04:42 +0200 Subject: [PATCH 08/12] fix processing of gziped assemblies for gene annotation --- workflows/bacass.nf | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/workflows/bacass.nf b/workflows/bacass.nf index 28086580..4c37c1f2 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -404,20 +404,23 @@ workflow BACASS { ch_quast_multiqc = QUAST.out.tsv ch_versions = ch_versions.mix(QUAST.out.versions.ifEmpty(null)) + // Check assemblies that require further processing for gene annotation + ch_assembly + .branch{ meta, fasta -> + gzip: fasta.name.endsWith('.gz') + skip: true + } + .set{ ch_assembly_for_gunzip } + // // MODULE: PROKKA, gene annotation // ch_prokka_txt_multiqc = Channel.empty() if ( !params.skip_annotation && params.annotation_tool == 'prokka' ) { // Uncompress assembly for annotation if necessary - if( !ch_assembly.map{ it[1].endsWith('.gz') }.any() ){ // FIXME: Not works with dragonflye output. - GUNZIP ( ch_assembly ) - ch_to_prokka = GUNZIP.out.gunzip - ch_versions = ch_versions.mix(GUNZIP.out.versions.ifEmpty(null)) - } else { - ch_assembly - .set{ ch_to_prokka } - } + GUNZIP ( ch_assembly_for_gunzip.gzip ) + ch_to_prokka = ch_assembly_for_gunzip.skip.mix( GUNZIP.out.gunzip ) + ch_versions = ch_versions.mix( GUNZIP.out.versions.ifEmpty(null) ) PROKKA ( ch_to_prokka, @@ -434,21 +437,15 @@ workflow BACASS { ch_bakta_txt_multiqc = Channel.empty() if ( !params.skip_annotation && params.annotation_tool == 'bakta' ) { // Uncompress assembly for annotation if necessary - if( ch_assembly.map{ it[1].endsWith('.gz')}.any() ){ // FIXME: Not works with dragonflye output. - GUNZIP ( ch_assembly ) - ch_to_bakta = GUNZIP.out.gunzip - ch_versions = ch_versions.mix(GUNZIP.out.versions.ifEmpty(null)) - } else { - ch_assembly - .set{ ch_to_bakta } - } + GUNZIP ( ch_assembly_for_gunzip.gzip ) + ch_to_bakta = ch_assembly_for_gunzip.skip.mix( GUNZIP.out.gunzip ) + ch_versions = ch_versions.mix( GUNZIP.out.versions.ifEmpty(null) ) BAKTA_DBDOWNLOAD_RUN ( ch_to_bakta, params.baktadb, params.baktadb_download ) - ch_bakta_txt_multiqc = BAKTA_DBDOWNLOAD_RUN.out.bakta_txt_multiqc.collect() ch_versions = ch_versions.mix(BAKTA_DBDOWNLOAD_RUN.out.versions) } From af444ff71ada24488c2a1d482f63f94219685673 Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Mon, 30 Oct 2023 14:59:53 +0100 Subject: [PATCH 09/12] fix ext.args and update help text on dragonflye module --- conf/modules.config | 8 ++++++-- nextflow_schema.json | 3 ++- workflows/bacass.nf | 1 - 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 858236b4..d33ea8de 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -96,8 +96,12 @@ process { withName: 'DRAGONFLYE' { ext.args = { - if( ${meta.gsize} != 'NA' && !params.dragonflye_args ) { - "--gsize ${meta.gsize}" + if ( $meta.gsize || $meta.gsize != 'NA' ){ + if ( !params.dragonflye_args.contains("--gsize") ) { + "--gsize ${meta.gsize} ${params.dragonflye_args}" + } else { + params.dragonflye_args ? params.dragonflye_args : '' + } } else { params.dragonflye_args ? params.dragonflye_args : '' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 31d65e92..2eb706e0 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -115,7 +115,8 @@ }, "dragonflye_args": { "type": "string", - "description": "Extra arguments for [Dragonflye](https://github.com/rpetit3/dragonflye#usage)" + "description": "Extra arguments for [Dragonflye](https://github.com/rpetit3/dragonflye#usage)", + "help_text": "This advanced option allows you to add extra arguments to Dragonflye (e.g.: `\"--gsize 2.4m\"`). For those arguments with no values/options associated (e.g.: `\"--nopolish\"` or `\"--nofilter\"`...) you need to add an extra space at the begining of the input string to params.dragonflye_args. Example: --params.dragonflye_args ' --nopolish'" } } }, diff --git a/workflows/bacass.nf b/workflows/bacass.nf index 4c37c1f2..708ce8c4 100644 --- a/workflows/bacass.nf +++ b/workflows/bacass.nf @@ -296,7 +296,6 @@ workflow BACASS { // // MODULE: Dragonflye, genome assembly, long reads // - // TODO: Allow pipeline to get the GenomeSize and input this as params.dragonflye_args = "--gsize $genomeSize" if( params.assembler == 'dragonflye' ){ DRAGONFLYE( ch_for_assembly.map { meta, sr, lr -> tuple(meta, lr) } From 2fe2a261a7aab9986e8d5a27f34d25dad1aa8a19 Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Mon, 30 Oct 2023 16:58:05 +0100 Subject: [PATCH 10/12] update changelog #104 --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27b027f0..1889c243 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Added` +- [#104](https://github.com/nf-core/bacass/pull/104) - Added dragonflye module for long-reads assembly + ### `Fixed` ### `Dependencies` From aa09a83982f050928f7ee6e5820336b9edfdc34b Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Tue, 31 Oct 2023 17:41:58 +0100 Subject: [PATCH 11/12] fix condition to parse gsize in dragonflye module conf --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index d33ea8de..46308d77 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -96,7 +96,7 @@ process { withName: 'DRAGONFLYE' { ext.args = { - if ( $meta.gsize || $meta.gsize != 'NA' ){ + if ( $meta.gsize && $meta.gsize != 'NA' ){ if ( !params.dragonflye_args.contains("--gsize") ) { "--gsize ${meta.gsize} ${params.dragonflye_args}" } else { From c3ad68acb631f14c009b51b15f11fbede8ef1fc3 Mon Sep 17 00:00:00 2001 From: Daniel-VM Date: Thu, 2 Nov 2023 16:16:08 +0100 Subject: [PATCH 12/12] applied reviewer suggestions #104 --- README.md | 2 +- conf/modules.config | 8 ++++---- docs/output.md | 3 +++ 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 7cd33be0..445f6801 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ This pipeline is primarily for bacterial assembly of next-generation sequencing ### Long Read Assembly For users that only have Nanopore data, the pipeline quality trims these using [PoreChop](https://github.com/rrwick/Porechop) and assesses basic sequencing QC utilizing [NanoPlot](https://github.com/wdecoster/NanoPlot) and [PycoQC](https://github.com/a-slide/pycoQC). -The pipeline can then perform long read assembly utilizing [Unicycler](https://github.com/rrwick/Unicycler), [Miniasm](https://github.com/lh3/miniasm) in combination with [Racon](https://github.com/isovic/racon), [Canu](https://github.com/marbl/canu) or [Flye](https://github.com/fenderglass/Flye) by using the [Dragonflye](https://github.com/rpetit3/dragonflye)\* pipeline. Long reads assembly can be polished using [Medaka](https://github.com/nanoporetech/medaka) or [NanoPolish](https://github.com/jts/nanopolish) with Fast5 files. +The pipeline can then perform long read assembly utilizing [Unicycler](https://github.com/rrwick/Unicycler), [Miniasm](https://github.com/lh3/miniasm) in combination with [Racon](https://github.com/isovic/racon), [Canu](https://github.com/marbl/canu) or [Flye](https://github.com/fenderglass/Flye) by using the [Dragonflye](https://github.com/rpetit3/dragonflye)(\*) pipeline. Long reads assembly can be polished using [Medaka](https://github.com/nanoporetech/medaka) or [NanoPolish](https://github.com/jts/nanopolish) with Fast5 files. > **\*Note**: Dragonflye is a comprehensive pipeline designed for genome assembly of Oxford Nanopore Reads. It facilitates the utilization of Flye (default), Miniasm, and Raven assemblers, along with Racon(default) and Medaka polishers. For more information, visit the [Dragonflye GitHub](https://github.com/rpetit3/dragonflye) repository. diff --git a/conf/modules.config b/conf/modules.config index 46308d77..4f39a538 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -96,21 +96,21 @@ process { withName: 'DRAGONFLYE' { ext.args = { - if ( $meta.gsize && $meta.gsize != 'NA' ){ + if ( !$meta.gsize?.equals('NA') ){ if ( !params.dragonflye_args.contains("--gsize") ) { "--gsize ${meta.gsize} ${params.dragonflye_args}" } else { - params.dragonflye_args ? params.dragonflye_args : '' + params.dragonflye_args ?: '' } } else { - params.dragonflye_args ? params.dragonflye_args : '' + params.dragonflye_args ?: '' } } publishDir = [ path: { "${params.outdir}/Dragonflye" }, mode: params.publish_dir_mode, - pattern: '*.fa', + pattern: "*.{fa,log}", saveAs: { filename -> if (filename.equals('versions.yml')) { null diff --git a/docs/output.md b/docs/output.md index af7c37d5..ba44aa38 100644 --- a/docs/output.md +++ b/docs/output.md @@ -148,6 +148,9 @@ Check out the [Miniasm documentation](https://github.com/lh3/miniasm) for more i - `Dragonflye/` - `*.contigs.fa`: Assembly in Fasta format + - `*.dragonflye.log`: Log file containing the report of the dragonflye process + +Checkout the [Dragonflye](https://github.com/rpetit3/dragonflye) documentation for more information of the Dragonflye output.