Skip to content

Commit

Permalink
Add new module: Flye (#1164)
Browse files Browse the repository at this point in the history
* changing mv by gzip

* changing mv by gzip

* first module creation

* add test.yml

* add flye to pyestes_modules.yml

* update flye module

* delete functions.nf

* generate test.yml

* fix contains from test.yml

* test file assembly_info.txt with regex

* check that file contains at least contig_1

* fix typo in contains

* update version

* split fastq file for raw runs

* use asm-coverage to reduce memory usage

* fix module name error

* add genome-size

* decrease coverage

* change test data for raw runs

* add coverage and genome size

* Apply comments from code review

Co-authored-by: SusiJo <[email protected]>

* after many trys, add a stub run

* remove md5sum for stub run

* Apply suggestions from code review

Co-authored-by: James A. Fellows Yates <[email protected]>

* fix review comments

* Apply suggestions from code review

Co-authored-by: SusiJo <[email protected]>

* no hardcoded version in stub run

* Update modules/flye/main.nf

Co-authored-by: Mahesh Binzer-Panchal <[email protected]>

Co-authored-by: SusiJo <[email protected]>
Co-authored-by: James A. Fellows Yates <[email protected]>
Co-authored-by: Mahesh Binzer-Panchal <[email protected]>
  • Loading branch information
4 people committed Jun 30, 2022
1 parent 031fbd3 commit bd0fa88
Show file tree
Hide file tree
Showing 6 changed files with 302 additions and 0 deletions.
68 changes: 68 additions & 0 deletions modules/flye/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
process FLYE {
tag "$meta.id"
label 'process_high'

conda (params.enable_conda ? "bioconda::flye=2.9" : null)
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/flye:2.9--py39h6935b12_1' :
'quay.io/biocontainers/flye:2.9--py39h6935b12_1' }"

input:
tuple val(meta), path(reads)
val mode

output:
tuple val(meta), path("*.fasta.gz"), emit: fasta
tuple val(meta), path("*.gfa.gz") , emit: gfa
tuple val(meta), path("*.gv.gz") , emit: gv
tuple val(meta), path("*.txt") , emit: txt
tuple val(meta), path("*.log") , emit: log
tuple val(meta), path("*.json") , emit: json
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def valid_mode = ["--pacbio-raw", "--pacbio-corr", "--pacbio-hifi", "--nano-raw", "--nano-corr", "--nano-hq"]
if ( !valid_mode.contains(mode) ) { error "Unrecognised mode to run Flye. Options: ${valid_mode.join(', ')}" }
"""
flye \\
$mode \\
$reads \\
--out-dir . \\
--threads \\
$task.cpus \\
$args
gzip -c assembly.fasta > ${prefix}.assembly.fasta.gz
gzip -c assembly_graph.gfa > ${prefix}.assembly_graph.gfa.gz
gzip -c assembly_graph.gv > ${prefix}.assembly_graph.gv.gz
mv assembly_info.txt ${prefix}.assembly_info.txt
mv flye.log ${prefix}.flye.log
mv params.json ${prefix}.params.json
cat <<-END_VERSIONS > versions.yml
"${task.process}":
flye: \$( flye --version )
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
echo stub > assembly.fasta | gzip -c assembly.fasta > ${prefix}.assembly.fasta.gz
echo stub > assembly_graph.gfa | gzip -c assembly_graph.gfa > ${prefix}.assembly_graph.gfa.gz
echo stub > assembly_graph.gv | gzip -c assembly_graph.gv > ${prefix}.assembly_graph.gv.gz
echo contig_1 > ${prefix}.assembly_info.txt
echo stub > ${prefix}.flye.log
echo stub > ${prefix}.params.json
cat <<-END_VERSIONS > versions.yml
"${task.process}":
flye: \$( flye --version )
END_VERSIONS
"""
}
69 changes: 69 additions & 0 deletions modules/flye/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
name: "flye"
description: De novo assembler for single molecule sequencing reads
keywords:
- assembly
- genome
- de novo
- genome assembler
- single molecule
tools:
- "flye":
description: "Fast and accurate de novo assembler for single molecule sequencing reads"
homepage: "https://github.com/fenderglass/Flye"
documentation: "https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md"
tool_dev_url: "https://github.com/fenderglass/Flye"
doi: "10.1038/s41592-020-00971-x"
licence: "['BSD-3-clause']"

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- reads:
type: file
description: Input reads from Oxford Nanopore or PacBio data in FASTA/FASTQ format.
pattern: "*.{fasta,fastq,fasta.gz,fastq.gz,fa,fq,fa.gz,fq.gz}"
- mode:
type: value
description: Flye mode depending on the input data (source and error rate)
pattern: "--pacbio-raw|--pacbio-corr|--pacbio-hifi|--nano-raw|--nano-corr|--nano-hq"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test' ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- fasta:
type: file
description: Assembled FASTA file
pattern: "*.fasta.gz"
- gfa:
type: file
description: Repeat graph in gfa format
pattern: "*.gfa.gz"
- gv:
type: file
description: Repeat graph in gv format
pattern: "*.gv.gz"
- txt:
type: file
description: Extra information and statistics about resulting contigs
pattern: "*.txt"
- log:
type: file
description: Flye log file
pattern: "*.log"
- json:
type: file
description: Flye parameters
pattern: "*.json"

authors:
- "@mirpedrol"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,10 @@ flash:
- modules/flash/**
- tests/modules/flash/**

flye:
- modules/flye/**
- tests/modules/flye/**

freebayes:
- modules/freebayes/**
- tests/modules/freebayes/**
Expand Down
71 changes: 71 additions & 0 deletions tests/modules/flye/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { FLYE } from '../../../modules/flye/main.nf'

workflow test_flye_pacbio_raw {

input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
]
mode = "--pacbio-raw"

FLYE ( input, mode )
}

workflow test_flye_pacbio_corr {

input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
]
mode = "--pacbio-corr"

FLYE ( input, mode )
}

workflow test_flye_pacbio_hifi {

input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
]
mode = "--pacbio-hifi"

FLYE ( input, mode )
}

workflow test_flye_nano_raw {

input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
]
mode = "--nano-raw"

FLYE ( input, mode )
}

workflow test_flye_nano_corr {

input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
]
mode = "--nano-corr"

FLYE ( input, mode )
}

workflow test_flye_nano_hq {

input = [
[ id:'test' ], // meta map
file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
]
mode = "--nano-hq"

FLYE ( input, mode )
}
5 changes: 5 additions & 0 deletions tests/modules/flye/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {

publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }

}
85 changes: 85 additions & 0 deletions tests/modules/flye/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# According to the issue https://github.com/fenderglass/Flye/issues/164
# Some fluctuations are expected because of the heuristics
# Here we check the that test.assembly_info.txt contains at least one contig

- name: flye test_flye_pacbio_raw
command: nextflow run ./tests/modules/flye -entry test_flye_pacbio_raw -c ./tests/config/nextflow.config -c ./tests/modules/flye/nextflow.config -stub-run
tags:
- flye
files:
- path: output/flye/test.assembly.fasta.gz
- path: output/flye/test.assembly_graph.gfa.gz
- path: output/flye/test.assembly_graph.gv.gz
- path: output/flye/test.assembly_info.txt
contains: ["contig_1"]
- path: output/flye/test.flye.log
- path: output/flye/test.params.json

- name: flye test_flye_pacbio_corr
command: nextflow run ./tests/modules/flye -entry test_flye_pacbio_corr -c ./tests/config/nextflow.config -c ./tests/modules/flye/nextflow.config
tags:
- flye
files:
- path: output/flye/test.assembly.fasta.gz
- path: output/flye/test.assembly_graph.gfa.gz
- path: output/flye/test.assembly_graph.gv.gz
- path: output/flye/test.assembly_info.txt
contains: ["contig_1"]
- path: output/flye/test.flye.log
- path: output/flye/test.params.json
md5sum: 54b576cb6d4d27656878a7fd3657bde9

- name: flye test_flye_pacbio_hifi
command: nextflow run ./tests/modules/flye -entry test_flye_pacbio_hifi -c ./tests/config/nextflow.config -c ./tests/modules/flye/nextflow.config
tags:
- flye
files:
- path: output/flye/test.assembly.fasta.gz
- path: output/flye/test.assembly_graph.gfa.gz
- path: output/flye/test.assembly_graph.gv.gz
- path: output/flye/test.assembly_info.txt
contains: ["contig_1"]
- path: output/flye/test.flye.log
- path: output/flye/test.params.json
md5sum: 54b576cb6d4d27656878a7fd3657bde9

- name: flye test_flye_nano_raw
command: nextflow run ./tests/modules/flye -entry test_flye_nano_raw -c ./tests/config/nextflow.config -c ./tests/modules/flye/nextflow.config -stub-run
tags:
- flye
files:
- path: output/flye/test.assembly.fasta.gz
- path: output/flye/test.assembly_graph.gfa.gz
- path: output/flye/test.assembly_graph.gv.gz
- path: output/flye/test.assembly_info.txt
contains: ["contig_1"]
- path: output/flye/test.flye.log
- path: output/flye/test.params.json

- name: flye test_flye_nano_corr
command: nextflow run ./tests/modules/flye -entry test_flye_nano_corr -c ./tests/config/nextflow.config -c ./tests/modules/flye/nextflow.config
tags:
- flye
files:
- path: output/flye/test.assembly.fasta.gz
- path: output/flye/test.assembly_graph.gfa.gz
- path: output/flye/test.assembly_graph.gv.gz
- path: output/flye/test.assembly_info.txt
contains: ["contig_1"]
- path: output/flye/test.flye.log
- path: output/flye/test.params.json
md5sum: 54b576cb6d4d27656878a7fd3657bde9

- name: flye test_flye_nano_hq
command: nextflow run ./tests/modules/flye -entry test_flye_nano_hq -c ./tests/config/nextflow.config -c ./tests/modules/flye/nextflow.config
tags:
- flye
files:
- path: output/flye/test.assembly.fasta.gz
- path: output/flye/test.assembly_graph.gfa.gz
- path: output/flye/test.assembly_graph.gv.gz
- path: output/flye/test.assembly_info.txt
contains: ["contig_1"]
- path: output/flye/test.flye.log
- path: output/flye/test.params.json
md5sum: 54b576cb6d4d27656878a7fd3657bde9

0 comments on commit bd0fa88

Please sign in to comment.