Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add flye module #982

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions modules/flye/functions.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
//
// Utility functions used in nf-core DSL2 module files
//

//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}

//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}

//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}

//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}

//
// Function to save/publish module results
//
def saveFiles(Map args) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]

// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
56 changes: 56 additions & 0 deletions modules/flye/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'

params.options = [:]
options = initOptions(params.options)

process FLYE {
tag "$meta.id"
label 'process_high'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }

conda (params.enable_conda ? "flye==2.9--py38h69e0bdc_0" : null)
d4straub marked this conversation as resolved.
Show resolved Hide resolved
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/flye:2.9--py39h39abbe0_0"
} else {
container "quay.io/biocontainers/flye:2.9--py38h69e0bdc_0"
mahesh-panchal marked this conversation as resolved.
Show resolved Hide resolved
}

input:
tuple val(meta), path(reads)

output:
tuple val(meta), path("*.fasta"), emit: fasta
tuple val(meta), path("*.gfa") , emit: gfa
tuple val(meta), path("*.gv") , emit: gv
tuple val(meta), path("*.txt") , emit: txt
tuple val(meta), path("*.log") , emit: log
tuple val(meta), path("*.json") , emit: json
path "versions.yml" , emit: versions

script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def mode = meta.mode
d4straub marked this conversation as resolved.
Show resolved Hide resolved
"""
flye \\
--$mode \\
$reads \\
$options.args \\
--threads $task.cpus \\
--out-dir ./

mv assembly.fasta ${prefix}.assembly.fasta
mv assembly_graph.gfa ${prefix}.assembly_graph.gfa
mv assembly_graph.gv ${prefix}.assembly_graph.gv
mirpedrol marked this conversation as resolved.
Show resolved Hide resolved
mv assembly_info.txt ${prefix}.assembly_info.txt
mv flye.log ${prefix}.flye.log
mv params.json ${prefix}.params.json

cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$( flye --version | sed 's/-b1768//' )
END_VERSIONS
"""
}
65 changes: 65 additions & 0 deletions modules/flye/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
name: flye
description: De novo assembler for single molecule sequencing reads
keywords:
- assembly
- genome
- de novo
- genome assembler
- single molecule
tools:
- flye:
description: De novo assembler for single molecule sequencing reads using repeat graphs
homepage: https://github.com/fenderglass/Flye
documentation: https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md
tool_dev_url: https://github.com/fenderglass/Flye
doi: doi:s41592-020-00971-x
licence: ['BSD-3-clause']

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', mode:'pacbio-raw' ]
mahesh-panchal marked this conversation as resolved.
Show resolved Hide resolved
- reads:
type: file
description: Input file in FASTA/FASTQ format.
pattern: "*.{fasta,fastq,fasta.gz,fastq.gz,fa,fq,fa.gz,fq.gz}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', mode:pacbio-raw ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- fasta:
type: file
description: Assembled FASTA file
pattern: "*.fasta"
- gfa:
type: file
description: Repeat graph
pattern: "*.gfa"
- gv:
type: file
description: Repeat graph
pattern: "*.gv"
- txt:
type: file
description: Extra contig information
pattern: "*.txt"
- log:
type: file
description: Flye log file
pattern: "*.log"
- json:
type: file
description: Flye parameters
pattern: "*.json"

authors:
- "@asthara10"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,10 @@ flash:
- modules/flash/**
- tests/modules/flash/**

flye:
- modules/flye/**
- tests/modules/flye/**

freebayes:
- modules/freebayes/**
- tests/modules/freebayes/**
Expand Down
53 changes: 53 additions & 0 deletions tests/modules/flye/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { FLYE } from '../../../modules/flye/main.nf' addParams( options: [:] )

workflow test_flye_pacbio_raw {
input = [ [ id:'test', mode:'pacbio-raw' ], // meta map
d4straub marked this conversation as resolved.
Show resolved Hide resolved
[ file(params.test_data['bacteroides_fragilis']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
]

FLYE ( input )
}

workflow test_flye_pacbio_corr {
input = [ [ id:'test', mode:'pacbio-corr' ], // meta map
[ file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) ]
]

FLYE ( input )
}

workflow test_flye_pacbio_hifi {
input = [ [ id:'test', mode:'pacbio-hifi' ], // meta map
[ file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) ]
]

FLYE ( input )
}

workflow test_flye_nano_raw {
input = [ [ id:'test', mode:'nano-raw' ], // meta map
[ file(params.test_data['bacteroides_fragilis']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
]

FLYE ( input )
}

workflow test_flye_nano_corr {
input = [ [ id:'test', mode:'nano-corr' ], // meta map
[ file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) ]
]

FLYE ( input )
}

workflow test_flye_nano_hq {
input = [ [ id:'test', mode:'nano-hq' ], // meta map
[ file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) ]
]

FLYE ( input )
}
101 changes: 101 additions & 0 deletions tests/modules/flye/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
- name: flye test_flye_pacbio_raw
command: nextflow run tests/modules/flye -entry test_flye_pacbio_raw -c tests/config/nextflow.config
tags:
- flye
files:
- path: output/flye/test.assembly.fasta
md5sum: 6f71c1083c4f992b5b4228036e2ff3c0
- path: output/flye/test.assembly_graph.gfa
md5sum: 4deb890fc3bf59a9dc3fdcd750dcbd5d
- path: output/flye/test.assembly_graph.gv
md5sum: 0aa137b7bced1574f44196a297bbd1a1
- path: output/flye/test.assembly_info.txt
md5sum: 68eaa8e0cdf6c4c4ac855c510db8c71f
- path: output/flye/test.flye.log
- path: output/flye/test.params.json
md5sum: 54b576cb6d4d27656878a7fd3657bde9

- name: flye test_flye_pacbio_corr
command: nextflow run tests/modules/flye -entry test_flye_pacbio_corr -c tests/config/nextflow.config
tags:
- flye
files:
- path: output/flye/test.assembly.fasta
md5sum: 819d6fb836fa673ddf7a467e5a848b6f
- path: output/flye/test.assembly_graph.gfa
md5sum: ec582080d03db6c171a76db36eb26f10
- path: output/flye/test.assembly_graph.gv
md5sum: 73399f4bce4c48ceda120a845dbb3c8c
- path: output/flye/test.assembly_info.txt
md5sum: a1695c9054cdff5f7a32f9a5294f2ffb
- path: output/flye/test.flye.log
- path: output/flye/test.params.json
md5sum: 54b576cb6d4d27656878a7fd3657bde9

- name: flye test_flye_pacbio_hifi
command: nextflow run tests/modules/flye -entry test_flye_pacbio_hifi -c tests/config/nextflow.config
tags:
- flye
files:
- path: output/flye/test.assembly.fasta
md5sum: a43006081eef11e5f8d0ccf23683364d
- path: output/flye/test.assembly_graph.gfa
md5sum: be04a4a6c55199cbd523b3acdd63ce50
- path: output/flye/test.assembly_graph.gv
md5sum: 22e124521d4eb601ebe799735701f2d2
- path: output/flye/test.assembly_info.txt
md5sum: 506742757032551329626327b332cec8
- path: output/flye/test.flye.log
- path: output/flye/test.params.json
md5sum: 54b576cb6d4d27656878a7fd3657bde9

- name: flye test_flye_nano_raw
command: nextflow run tests/modules/flye -entry test_flye_nano_raw -c tests/config/nextflow.config
tags:
- flye
files:
- path: output/flye/test.assembly.fasta
md5sum: 423606b50dfdf41e59308ba795e7db71
- path: output/flye/test.assembly_graph.gfa
md5sum: 302ac6264e67925a22d3653d38ccb9eb
- path: output/flye/test.assembly_graph.gv
md5sum: b18bd15f95607500d325ea23cf23ab50
- path: output/flye/test.assembly_info.txt
md5sum: 8123dbbfaeb840821302d620831ee489
- path: output/flye/test.flye.log
- path: output/flye/test.params.json
md5sum: 54b576cb6d4d27656878a7fd3657bde9

- name: flye test_flye_nano_corr
command: nextflow run tests/modules/flye -entry test_flye_nano_corr -c tests/config/nextflow.config
tags:
- flye
files:
- path: output/flye/test.assembly.fasta
md5sum: eaad569928775fee0025903670f2084d
- path: output/flye/test.assembly_graph.gfa
md5sum: ab7c0897ab4adcaef345c6d5bf560053
- path: output/flye/test.assembly_graph.gv
md5sum: cbf5dfb40b25ba19faf64c315c23dbc1
- path: output/flye/test.assembly_info.txt
md5sum: 9791a3af96936b69910dc043dbb3ac27
- path: output/flye/test.flye.log
- path: output/flye/test.params.json
md5sum: 54b576cb6d4d27656878a7fd3657bde9

- name: flye test_flye_nano_hq
command: nextflow run tests/modules/flye -entry test_flye_nano_hq -c tests/config/nextflow.config
tags:
- flye
files:
- path: output/flye/test.assembly.fasta
md5sum: 069936228fd00330b689c6638fc68fc2
- path: output/flye/test.assembly_graph.gfa
md5sum: 56e3a79f08303d2aea3533c0c11b8398
- path: output/flye/test.assembly_graph.gv
md5sum: 4dff1ebda5884cf369a6ae902029c52b
- path: output/flye/test.assembly_info.txt
md5sum: 26f699b25ab84ef7b1bf48c89e04e353
- path: output/flye/test.flye.log
- path: output/flye/test.params.json
md5sum: 54b576cb6d4d27656878a7fd3657bde9