Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trycycler cluster #5670

Merged
merged 5 commits into from
May 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions modules/nf-core/trycycler/cluster/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "trycycler_cluster"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::trycycler=0.5.3"
56 changes: 56 additions & 0 deletions modules/nf-core/trycycler/cluster/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
process TRYCYCLER_CLUSTER {
tag "$meta.id"
label 'process_medium'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/trycycler:0.5.3--pyhdfd78af_0':
'biocontainers/trycycler:0.5.3--pyhdfd78af_0' }"

input:
tuple val(meta), path(contigs), path(reads)

output:
tuple val(meta), path("*") , emit: cluster_dir
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"

"""
trycycler \\
cluster \\
$args \\
--assemblies ${contigs} \\
--reads ${reads} \\
--threads $task.cpus \\
--out_dir ${prefix}

gzip $args2 ${prefix}/cluster_*/*/*.fasta

cat <<-END_VERSIONS > versions.yml
"${task.process}":
trycycler: \$(trycycler --version | sed 's/Trycycler v//')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
mkdir -p ${prefix}/cluster_001/1_contigs
echo "" | gzip > ${prefix}/cluster_001/1_contigs/A_contig_2a.fasta.gz
touch ${prefix}/contigs.newick
touch ${prefix}/contigs.phylip

cat <<-END_VERSIONS > versions.yml
"${task.process}":
trycycler: \$(trycycler --version | sed 's/Trycycler v//')
END_VERSIONS
"""
}
55 changes: 55 additions & 0 deletions modules/nf-core/trycycler/cluster/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: "trycycler_cluster"
description: Cluster contigs from multiple assemblies by similarity
keywords:
- cluster
- alignment
- fastq
- fasta
- genomics
tools:
- "trycycler":
description: Trycycler is a tool for generating consensus long-read assemblies for bacterial genomes
homepage: https://github.com/rrwick/Trycycler
documentation: https://github.com/rrwick/Trycycler/wiki
doi: 10.1186/s13059-021-02483-z
licence: ["GPL v3"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`

- reads:
type: file
description: Long-read FASTQ file, optionally gzip compressed

- assemblies:
type: file
description: Input assemblies whose contigs will be clustered

- out_dir:
type: directory
description: Output directory for clustering results

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`

- cluster_dir:
type: directory
description: Output directory containing clustering results

- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@watsonar"
maintainers:
- "@watsonar"
77 changes: 77 additions & 0 deletions modules/nf-core/trycycler/cluster/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
nextflow_process {

name "Test Process TRYCYCLER_CLUSTER"
script "../main.nf"
process "TRYCYCLER_CLUSTER"
config './nextflow.config'

tag "modules"
tag "modules_nfcore"
tag "trycycler"
tag "trycycler/cluster"

test("custom test data") {

when {
process {
"""
def contigs1 = file("contigs1.fasta")
contigs1.text = '>contig_1a\\nATCCCCTTGGACTCCTAGCTAGGCTCTAGAGCCTTCTCTCGACTACGAGCTAGCAGCTACGATCACGACTACAGCACAGCACTACAGCATCAGCACAGCAGTCAGCGA\\n>contig_2a\\nGCTAGGCTCTAACGCGCCATCAGCCGCTCAGCTACGACTATCGCAGCTACGATCAGCATCGATCTAGCAGAGCCTTCTGGTAACGCGACTCAGTCTCTACAGCACGGTAACCAGCACTACAGGGGTTAAGCCCATCA\\n'

def contigs2 = file("contigs2.fasta")
contigs2.text = '>contig_1b\\nATCCCCTTGGACTCCTAGCTAGGCTCTAGAGCCTTCTCTCGACTACCCAGCTAGCAGCTACGATCACGTCTACAGCACAGCACTACAGCATCAGCACAGCAGTA\\n>contig_2b\\nGCTAGGCTCTAACGCGCCATCAGCCGCTCAGCTACGACTAGCGCAGCTACGATCAGCATCCATCTAGCAGAGCCAACTGGTAACGCGACTCAGTCTCTACAGCACGGTAACCAGCACTACAGGGGTTAAGCCCATCA\\n'

def reads = file("reads.fastq")
reads.text = '@NC-000913.3_646515_unaligned_0_F_0_1238_0\\nCTCTAGAGCCTTCTCTCGACTACGAGCTAGCAGCTACGA\\n+\\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\\[email protected]_646518_unaligned_0_F_0_1238_0\\nTAGGCTCTAACGCGCCATCAGCCGCTCAGCTACGACTAGCGC\\n+\\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\\n'

input[0] = [
[ id:'test' ], // meta map
[ file('contigs1.fasta', checkIfExists: true), file('contigs2.fasta', checkIfExists: true) ], // contigs
file('reads.fastq', checkIfExists: true) // reads
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("custom test data - stub") {

when {
process {
options "-stub-run"
"""
def contigs1 = file("contigs1.fasta")
contigs1.text = '>contig_1a\\nATCCCCTTGGACTCCTAGCTAGGCTCTAGAGCCTTCTCTCGACTACGAGCTAGCAGCTACGATCACGACTACAGCACAGCACTACAGCATCAGCACAGCAGTCAGCGA\\n>contig_2a\\nGCTAGGCTCTAACGCGCCATCAGCCGCTCAGCTACGACTATCGCAGCTACGATCAGCATCGATCTAGCAGAGCCTTCTGGTAACGCGACTCAGTCTCTACAGCACGGTAACCAGCACTACAGGGGTTAAGCCCATCA\\n'

def contigs2 = file("contigs2.fasta")
contigs2.text = '>contig_1b\\nATCCCCTTGGACTCCTAGCTAGGCTCTAGAGCCTTCTCTCGACTACCCAGCTAGCAGCTACGATCACGTCTACAGCACAGCACTACAGCATCAGCACAGCAGTA\\n>contig_2b\\nGCTAGGCTCTAACGCGCCATCAGCCGCTCAGCTACGACTAGCGCAGCTACGATCAGCATCCATCTAGCAGAGCCAACTGGTAACGCGACTCAGTCTCTACAGCACGGTAACCAGCACTACAGGGGTTAAGCCCATCA\\n'

def reads = file("reads.fastq")
reads.text = '@NC-000913.3_646515_unaligned_0_F_0_1238_0\\nCTCTAGAGCCTTCTCTCGACTACGAGCTAGCAGCTACGA\\n+\\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\\[email protected]_646518_unaligned_0_F_0_1238_0\\nTAGGCTCTAACGCGCCATCAGCCGCTCAGCTACGACTAGCGC\\n+\\nIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII\\n'

input[0] = [
[ id:'test' ], // meta map
[ file('contigs1.fasta', checkIfExists: true), file('contigs2.fasta', checkIfExists: true) ], // contigs
file('reads.fastq', checkIfExists: true) // reads
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}
}
142 changes: 142 additions & 0 deletions modules/nf-core/trycycler/cluster/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
{
"custom test data - stub": {
"content": [
{
"0": [
[
{
"id": "test"
},
[
[
[
[
"A_contig_2a.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"contigs.newick:md5,d41d8cd98f00b204e9800998ecf8427e",
"contigs.phylip:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
]
]
],
"1": [
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
],
"cluster_dir": [
[
{
"id": "test"
},
[
[
[
[
"A_contig_2a.fasta.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
]
],
"contigs.newick:md5,d41d8cd98f00b204e9800998ecf8427e",
"contigs.phylip:md5,d41d8cd98f00b204e9800998ecf8427e"
],
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
]
]
],
"versions": [
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-22T14:59:36.502272455"
},
"custom test data": {
"content": [
{
"0": [
[
{
"id": "test"
},
[
[
[
[
"A_contig_2a.fasta.gz:md5,2c579f9cc210a544772daa7f4a16d0d5"
]
],
[
[
"B_contig_2b.fasta.gz:md5,4b4819e6b8f996d387216450b314798e"
]
],
[
[
"A_contig_1a.fasta.gz:md5,6d49552461c330871cecac80635c1798"
]
],
[
[
"B_contig_1b.fasta.gz:md5,f012c88fa34c176735737485eb730d71"
]
],
"contigs.newick:md5,1793ccb2bff4a79525a1ce60fb90f8ab",
"contigs.phylip:md5,984639efc8ae4b4cbe6e53bdb0b8b8c1"
],
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
]
]
],
"1": [
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
],
"cluster_dir": [
[
{
"id": "test"
},
[
[
[
[
"A_contig_2a.fasta.gz:md5,2c579f9cc210a544772daa7f4a16d0d5"
]
],
[
[
"B_contig_2b.fasta.gz:md5,4b4819e6b8f996d387216450b314798e"
]
],
[
[
"A_contig_1a.fasta.gz:md5,6d49552461c330871cecac80635c1798"
]
],
[
[
"B_contig_1b.fasta.gz:md5,f012c88fa34c176735737485eb730d71"
]
],
"contigs.newick:md5,1793ccb2bff4a79525a1ce60fb90f8ab",
"contigs.phylip:md5,984639efc8ae4b4cbe6e53bdb0b8b8c1"
],
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
]
]
],
"versions": [
"versions.yml:md5,616584ce01fb659316a99b1329e047aa"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-22T14:59:28.088111496"
}
}
5 changes: 5 additions & 0 deletions modules/nf-core/trycycler/cluster/tests/nextflow.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
withName: TRYCYCLER_CLUSTER {
ext.args = '--min_contig_len 0 --min_contig_depth 0'
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/trycycler/cluster/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
trycycler/cluster:
- "modules/nf-core/trycycler/cluster/**"
9 changes: 9 additions & 0 deletions modules/nf-core/trycycler/subsample/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "trycycler_subsample"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::trycycler=0.5.3"
Loading
Loading