Skip to content

Commit

Permalink
Trycycler cluster (#5670)
Browse files Browse the repository at this point in the history
* initial trycycler subsample commit

* Update modules/nf-core/trycycler/subsample/main.nf

to resolve issue with nf-test and empty gzipped files

Co-authored-by: Simon Pearce <[email protected]>

* addressing comments on initial commit, fixed version number, added stub tests

* initial commit for trycycler cluster module

---------

Co-authored-by: Simon Pearce <[email protected]>
  • Loading branch information
watsonar and SPPearce committed May 25, 2024
1 parent 8ead864 commit e788348
Show file tree
Hide file tree
Showing 15 changed files with 734 additions and 0 deletions.
9 changes: 9 additions & 0 deletions modules/nf-core/trycycler/cluster/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "trycycler_cluster"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::trycycler=0.5.3"
56 changes: 56 additions & 0 deletions modules/nf-core/trycycler/cluster/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
process TRYCYCLER_CLUSTER {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/trycycler:0.5.3--pyhdfd78af_0':
'biocontainers/trycycler:0.5.3--pyhdfd78af_0' }"

input:
tuple val(meta), path(contigs), path(reads)

output:
tuple val(meta), path("*") , emit: cluster_dir
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
trycycler \\
cluster \\
$args \\
--assemblies ${contigs} \\
--reads ${reads} \\
--threads $task.cpus \\
--out_dir ${prefix}
gzip $args2 ${prefix}/cluster_*/*/*.fasta
cat <<-END_VERSIONS > versions.yml
"${task.process}":
trycycler: \$(trycycler --version | sed 's/Trycycler v//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
mkdir -p ${prefix}/cluster_001/1_contigs
echo "" | gzip > ${prefix}/cluster_001/1_contigs/A_contig_2a.fasta.gz
touch ${prefix}/contigs.newick
touch ${prefix}/contigs.phylip
cat <<-END_VERSIONS > versions.yml
"${task.process}":
trycycler: \$(trycycler --version | sed 's/Trycycler v//')
END_VERSIONS
"""
}
55 changes: 55 additions & 0 deletions modules/nf-core/trycycler/cluster/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: "trycycler_cluster"
description: Cluster contigs from multiple assemblies by similarity
keywords:
- cluster
- alignment
- fastq
- fasta
- genomics
tools:
- "trycycler":
description: Trycycler is a tool for generating consensus long-read assemblies for bacterial genomes
homepage: https://github.com/rrwick/Trycycler
documentation: https://github.com/rrwick/Trycycler/wiki
doi: 10.1186/s13059-021-02483-z
licence: ["GPL v3"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- reads:
type: file
description: Long-read FASTQ file, optionally gzip compressed

- assemblies:
type: file
description: Input assemblies whose contigs will be clustered

- out_dir:
type: directory
description: Output directory for clustering results

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- cluster_dir:
type: directory
description: Output directory containing clustering results

- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@watsonar"
maintainers:
- "@watsonar"
77 changes: 77 additions & 0 deletions modules/nf-core/trycycler/cluster/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
nextflow_process {

name "Test Process TRYCYCLER_CLUSTER"
script "../main.nf"
process "TRYCYCLER_CLUSTER"
config './nextflow.config'

tag "modules"
tag "modules_nfcore"
tag "trycycler"
tag "trycycler/cluster"

test("custom test data") {

when {
process {
"""
def contigs1 = file("contigs1.fasta")
contigs1.text = '>contig_1a\\nATCCCCTTGGACTCCTAGCTAGGCTCTAGAGCCTTCTCTCGACTACGAGCTAGCAGCTACGATCACGACTACAGCACAGCACTACAGCATCAGCACAGCAGTCAGCGA\\n>contig_2a\\nGCTAGGCTCTAACGCGCCATCAGCCGCTCAGCTACGACTATCGCAGCTACGATCAGCATCGATCTAGCAGAGCCTTCTGGTAACGCGACTCAGTCTCTACAGCACGGTAACCAGCACTACAGGGGTTAAGCCCATCA\\n'
def contigs2 = file("contigs2.fasta")
contigs2.text = '>contig_1b\\nATCCCCTTGGACTCCTAGCTAGGCTCTAGAGCCTTCTCTCGACTACCCAGCTAGCAGCTACGATCACGTCTACAGCACAGCACTACAGCATCAGCACAGCAGTA\\n>contig_2b\\nGCTAGGCTCTAACGCGCCATCAGCCGCTCAGCTACGACTAGCGCAGCTACGATCAGCATCCATCTAGCAGAGCCAACTGGTAACGCGACTCAGTCTCTACAGCACGGTAACCAGCACTACAGGGGTTAAGCCCATCA\\n'
def reads = file("reads.fastq")
reads.text = '@NC-000913.3_646515_unaligned_0_F_0_1238_0\\nCTCTAGAGCCTTCTCTCGACTACGAGCTAGCAGCTACGA\\n+\\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\\[email protected]_646518_unaligned_0_F_0_1238_0\\nTAGGCTCTAACGCGCCATCAGCCGCTCAGCTACGACTAGCGC\\n+\\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\\n'
input[0] = [
[ id:'test' ], // meta map
[ file('contigs1.fasta', checkIfExists: true), file('contigs2.fasta', checkIfExists: true) ], // contigs
file('reads.fastq', checkIfExists: true) // reads
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("custom test data - stub") {

when {
process {
options "-stub-run"
"""
def contigs1 = file("contigs1.fasta")
contigs1.text = '>contig_1a\\nATCCCCTTGGACTCCTAGCTAGGCTCTAGAGCCTTCTCTCGACTACGAGCTAGCAGCTACGATCACGACTACAGCACAGCACTACAGCATCAGCACAGCAGTCAGCGA\\n>contig_2a\\nGCTAGGCTCTAACGCGCCATCAGCCGCTCAGCTACGACTATCGCAGCTACGATCAGCATCGATCTAGCAGAGCCTTCTGGTAACGCGACTCAGTCTCTACAGCACGGTAACCAGCACTACAGGGGTTAAGCCCATCA\\n'
def contigs2 = file("contigs2.fasta")
contigs2.text = '>contig_1b\\nATCCCCTTGGACTCCTAGCTAGGCTCTAGAGCCTTCTCTCGACTACCCAGCTAGCAGCTACGATCACGTCTACAGCACAGCACTACAGCATCAGCACAGCAGTA\\n>contig_2b\\nGCTAGGCTCTAACGCGCCATCAGCCGCTCAGCTACGACTAGCGCAGCTACGATCAGCATCCATCTAGCAGAGCCAACTGGTAACGCGACTCAGTCTCTACAGCACGGTAACCAGCACTACAGGGGTTAAGCCCATCA\\n'
def reads = file("reads.fastq")
reads.text = '@NC-000913.3_646515_unaligned_0_F_0_1238_0\\nCTCTAGAGCCTTCTCTCGACTACGAGCTAGCAGCTACGA\\n+\\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\\[email protected]_646518_unaligned_0_F_0_1238_0\\nTAGGCTCTAACGCGCCATCAGCCGCTCAGCTACGACTAGCGC\\n+\\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\\n'
input[0] = [
[ id:'test' ], // meta map
[ file('contigs1.fasta', checkIfExists: true), file('contigs2.fasta', checkIfExists: true) ], // contigs
file('reads.fastq', checkIfExists: true) // reads
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}
}
142 changes: 142 additions & 0 deletions modules/nf-core/trycycler/cluster/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
{
"custom test data - stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
[
[
[
[
"A_contig_2a.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"contigs.newick:md5,d41d8cd98f00b204e9800998ecf8427e",
"contigs.phylip:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
]
]
],
"1": [
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
],
"cluster_dir": [
[
{
"id": "test"
},
[
[
[
[
"A_contig_2a.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"contigs.newick:md5,d41d8cd98f00b204e9800998ecf8427e",
"contigs.phylip:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
]
]
],
"versions": [
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-22T14:59:36.502272455"
},
"custom test data": {
"content": [
{
"0": [
[
{
"id": "test"
},
[
[
[
[
"A_contig_2a.fasta.gz:md5,2c579f9cc210a544772daa7f4a16d0d5"
]
],
[
[
"B_contig_2b.fasta.gz:md5,4b4819e6b8f996d387216450b314798e"
]
],
[
[
"A_contig_1a.fasta.gz:md5,6d49552461c330871cecac80635c1798"
]
],
[
[
"B_contig_1b.fasta.gz:md5,f012c88fa34c176735737485eb730d71"
]
],
"contigs.newick:md5,1793ccb2bff4a79525a1ce60fb90f8ab",
"contigs.phylip:md5,984639efc8ae4b4cbe6e53bdb0b8b8c1"
],
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
]
]
],
"1": [
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
],
"cluster_dir": [
[
{
"id": "test"
},
[
[
[
[
"A_contig_2a.fasta.gz:md5,2c579f9cc210a544772daa7f4a16d0d5"
]
],
[
[
"B_contig_2b.fasta.gz:md5,4b4819e6b8f996d387216450b314798e"
]
],
[
[
"A_contig_1a.fasta.gz:md5,6d49552461c330871cecac80635c1798"
]
],
[
[
"B_contig_1b.fasta.gz:md5,f012c88fa34c176735737485eb730d71"
]
],
"contigs.newick:md5,1793ccb2bff4a79525a1ce60fb90f8ab",
"contigs.phylip:md5,984639efc8ae4b4cbe6e53bdb0b8b8c1"
],
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
]
]
],
"versions": [
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-22T14:59:28.088111496"
}
}
5 changes: 5 additions & 0 deletions modules/nf-core/trycycler/cluster/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
withName: TRYCYCLER_CLUSTER {
ext.args = '--min_contig_len 0 --min_contig_depth 0'
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/trycycler/cluster/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
trycycler/cluster:
- "modules/nf-core/trycycler/cluster/**"
9 changes: 9 additions & 0 deletions modules/nf-core/trycycler/subsample/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "trycycler_subsample"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::trycycler=0.5.3"
Loading

0 comments on commit e788348

Please sign in to comment.