Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions modules/nf-core/trident/fetch/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
channels:
- conda-forge
- bioconda
dependencies:
- "bioconda::poseidon-trident=1.6.7.1"
70 changes: 70 additions & 0 deletions modules/nf-core/trident/fetch/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
process TRIDENT_FETCH {
tag ""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we somehow infer the tag from the input values?

label 'process_single'

conda "${moduleDir}/environment.yml"
container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container
? 'https://depot.galaxyproject.org/singularity/poseidon-trident:1.6.7.1--hebebf5b_1'
: 'biocontainers/poseidon-trident:1.6.7.1--hebebf5b_1'}"

input:
tuple path(archive_dir), val(fetch_s), path(fetch_fn)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure that a meta map will never be useful here?


output:
// All outputs are optional as fetch will check if the package already exists in the provided archive directories, and skip download if so.
path "output_archive/*/POSEIDON.yml", emit: poseidon_yml, optional: true
path "output_archive/*/*.{bed,geno,vcf,bed.gz,geno.gz,vcf.gz}", emit: geno, optional: true
path "output_archive/*/*.{bim,snp,bim.gz,snp.gz}", emit: snp, optional: true
path "output_archive/*/*.{fam,ind,fam.gz,ind.gz}", emit: ind, optional: true
path "output_archive/*/*.janno", emit: janno, optional: true
path "output_archive/*/*.ssf", emit: ssf, optional: true
path "output_archive/*/*.bib", emit: bib, optional: true
path "output_archive/*/CHANGELOG.md", emit: changelog, optional: true
path "output_archive/*/README.md", emit: readme, optional: true
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def fetch_string = fetch_s ? "--fetchString ${fetch_s}" : ''
def fetch_file = fetch_fn ? "--fetchFile ${fetch_fn}" : ''
// fetch will always download to the first directory provided in `-d`, but check all provided dirs for already downloaded packages.
// Handle multiple archive directories if provided
def archives = archive_dir ? '-d ' + archive_dir.join(" -d ") : ''
"""
trident fetch\\
-d output_archive/ \\
${archives} \\
${args} \\
${fetch_string} \\
${fetch_file}

cat <<-END_VERSIONS > versions.yml
"${task.process}":
trident: \$(trident --version)
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def fetch_string = fetch_s ? "--fetchString ${fetch_s}" : ''
def fetch_file = fetch_fn ? "--fetchFile ${fetch_fn}" : ''
def archives = archive_dir ? archive_dir.join(" -d ") : ''
"""
echo ${archives} ${fetch_string} ${fetch_file} ${args}

mkdir dummy_package_dir
touch dummy_package_dir/POSEIDON.yml
touch dummy_package_dir/dummy_package.geno
touch dummy_package_dir/dummy_package.snp
touch dummy_package_dir/dummy_package.ind
touch dummy_package_dir/dummy_package.janno

cat <<-END_VERSIONS > versions.yml
"${task.process}":
trident: \$(trident --version)
END_VERSIONS
"""
}
101 changes: 101 additions & 0 deletions modules/nf-core/trident/fetch/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "trident_fetch"
description: Download data from a remote Poseidon repository
keywords:
- poseidon-trident
- poseidon
- trident
- fetch
tools:
- "trident":
description: "A tool (trident) to work with modular genotype databases formatted
using Poseidon."
homepage: "https://www.poseidon-adna.org/"
documentation: "https://www.poseidon-adna.org/#/trident?id=fetch-command"
tool_dev_url: "https://github.com/poseidon-framework/poseidon-hs/"
doi: "10.7554/eLife.98317.1"
licence: ["MIT"]

input:
- - archive_dir:
type: directory
description: |
The path to the directories containing any previously downloaded Poseidon archive files.
If this is provided, trident fetch will check these directories, and only download missing packages from the Poseidon-Framework server.
- fetch_s:
type: string
description: The fetchString provided to trident fetch (`--fetchString`), if any.
- fetch_fn:
type: file
description: A file containing a list of fetchStrings provided to trident fetch (`--fetchFile`), if any.
pattern: "*.txt"

output:
poseidon_yml:
- output_archive/*/POSEIDON.yml:
type: file
description: The POSEIDON.yml file of any newly downloaded package(s).
pattern: "output_archive/*/POSEIDON.yml"
ontologies: []
geno:
- "output_archive/*/*.{geno,bed,vcf,geno.gz,bed.gz,vcf.gz}":
type: file
description: The genotype files of any newly downloaded package(s).
pattern: "output_archive/*/*.{geno,bed,vcf,geno.gz,bed.gz,vcf.gz}"
ontologies: []
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some outputs (at least this one) should have some suitable ontology. Can you please check?

snp:
- "output_archive/*/*.{snp,bim,snp.gz,bim.gz}":
type: file
description: The SNP information files of any newly downloaded package(s).
pattern: "output_archive/*/*.{snp,bim,snp.gz,bim.gz}"
ontologies: []
ind:
- "output_archive/*/*.{ind,fam,ind.gz,fam.gz}":
type: file
description: The individual information files of any newly downloaded package(s).
pattern: "output_archive/*/*.{ind,fam,ind.gz,fam.gz}"
ontologies: []
janno:
- output_archive/*/*.janno:
type: file
description: The .janno annotation files of any newly downloaded
package(s).
pattern: "output_archive/*/*.janno"
ontologies: []
ssf:
- output_archive/*/*.ssf:
type: file
description: The .ssf files of any newly downloaded package(s).
pattern: "output_archive/*/*.ssf"
ontologies: []
bib:
- output_archive/*/*.bib:
type: file
description: The .bib bibliography files of any newly downloaded
package(s).
pattern: "output_archive/*/*.bib"
ontologies: []
changelog:
- output_archive/*/CHANGELOG.md:
type: file
description: The CHANGELOG.md files of any newly downloaded package(s).
pattern: "output_archive/*/CHANGELOG.md"
ontologies: []
readme:
- output_archive/*/README.md:
type: file
description: The README.md files of any newly downloaded package(s).
pattern: "output_archive/*/README.md"
ontologies: []
versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
ontologies:
- edam: "http://edamontology.org/format_3750" # YAML

authors:
- "@TCLamnidis"
maintainers:
- "@TCLamnidis"
104 changes: 104 additions & 0 deletions modules/nf-core/trident/fetch/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// nf-core modules test trident/fetch
nextflow_process {

name "Test Process TRIDENT_FETCH"
script "../main.nf"
process "TRIDENT_FETCH"

tag "modules"
tag "modules_nfcore"
tag "trident"
tag "trident/fetch"

test("2012_MeyerScience-2.1.1 - fetchString") {
// This test uses the smallest poseidon package in the archives (22M). It should be quick to download and validate.

when {
process {
"""
input[0] = [
[], // archive_dir
'*2012_MeyerScience-2.1.1*', // fetchString
[], // fetchFile
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("Existing package: 2012_MeyerScience-2.1.1 - fetchString") {
// This test uses the smallest poseidon package in the archives (22M). It should be quick to download and validate.

setup {
run("TRIDENT_FETCH", alias: "PREFETCH") {
script "../main.nf"
process {
"""
input[0] = Channel.fromList([
tuple(
[], // archive_dir
'*2012_MeyerScience-2.1.1*', // fetchString
[], // fetchFile
)
])
"""
}
}
}

when {
process {
"""
input[0] = PREFETCH.out.poseidon_yml.map{
x ->
def archive_dir = x.parent.parent.parent
def fetchString = "'*2012_MeyerScience-2.1.1*'"
tuple(archive_dir, fetchString, [])
}
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("2012_MeyerScience-2.1.1 - fetchString - stub") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A test for fetchFile is missing


options "-stub"

when {
process {
"""
input[0] = [
[], // archive_dir
"2012_MeyerScience-2.1.1", // fetchString
[], // fetchFile
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
Loading
Loading