From e18a2de9c090b4e0f01b6cea1c1432070cd18253 Mon Sep 17 00:00:00 2001 From: Zehra Hazal Sezer <65447556+zehrahazalsezer@users.noreply.github.com> Date: Wed, 18 Oct 2023 17:04:42 +0200 Subject: [PATCH] htseq/count module is implemented (#4085) * htseq/count module is implemented * Update meta.yml bam.bai changed to .bai * Added meta2 * Implemented changes from review * update pytest runner * Update modules/nf-core/htseq/count/main.nf Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> --------- Co-authored-by: Maxime U Garcia Co-authored-by: Nicolas Vannieuwkerke <101190534+nvnieuwk@users.noreply.github.com> Co-authored-by: mashehu --- modules/nf-core/htseq/count/main.nf | 49 +++++++++++++++++ modules/nf-core/htseq/count/meta.yml | 53 +++++++++++++++++++ tests/config/pytest_modules.yml | 4 ++ tests/modules/nf-core/htseq/count/main.nf | 40 ++++++++++++++ .../nf-core/htseq/count/nextflow.config | 6 +++ tests/modules/nf-core/htseq/count/test.yml | 19 +++++++ 6 files changed, 171 insertions(+) create mode 100644 modules/nf-core/htseq/count/main.nf create mode 100644 modules/nf-core/htseq/count/meta.yml create mode 100644 tests/modules/nf-core/htseq/count/main.nf create mode 100644 tests/modules/nf-core/htseq/count/nextflow.config create mode 100644 tests/modules/nf-core/htseq/count/test.yml diff --git a/modules/nf-core/htseq/count/main.nf b/modules/nf-core/htseq/count/main.nf new file mode 100644 index 00000000000..1fc6510b2a7 --- /dev/null +++ b/modules/nf-core/htseq/count/main.nf @@ -0,0 +1,49 @@ +process HTSEQ_COUNT { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::htseq=2.0.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htseq:2.0.2--py310ha14a713_0': + 'biocontainers/htseq:2.0.2--py310ha14a713_0' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(gtf) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + htseq-count \\ + ${input} \\ + ${gtf} \\ + ${args} \\ + > ${prefix}.txt + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + htseq: \$(echo \$(htseq-count --version ) | sed 's/^.*htseq-count //; s/Using.*\$//' )) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + : \$(echo \$(htseq-count --version ) | sed 's/^.*htseq-count //; s/Using.*\$//' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/htseq/count/meta.yml b/modules/nf-core/htseq/count/meta.yml new file mode 100644 index 00000000000..87b88d73fd1 --- /dev/null +++ b/modules/nf-core/htseq/count/meta.yml @@ -0,0 +1,53 @@ +--- +name: "htseq_count" +description: count how many reads map to each feature +keywords: + - htseq + - count + - gtf + - annotation +tools: + - "htseq/count": + description: "HTSeq is a Python library to facilitate processing and analysis of data from high-throughput sequencing (HTS) experiments." + homepage: "https://htseq.readthedocs.io/en/latest/" + documentation: "https://htseq.readthedocs.io/en/latest/index.html" + doi: "10.1093/bioinformatics/btu638" + licence: "['GPL v3']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - meta2: + type: map + description: | + .gtf file information + e.g. `[ id:'test' ]` + - input: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: file + description: Contains indexed bam file + pattern: "*.bai" + - gtf: + type: file + description: Contains the features in the GTF format + pattern: "*.gtf" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'test', single_end:false ]` + - txt: + type: file + description: File containing feature counts output + pattern: ".txt" + +authors: + - "@zehrahazalsezer" diff --git a/tests/config/pytest_modules.yml b/tests/config/pytest_modules.yml index 99384ebe424..5a1f7454cb6 100644 --- a/tests/config/pytest_modules.yml +++ b/tests/config/pytest_modules.yml @@ -1916,6 +1916,10 @@ hpsuissero: - modules/nf-core/hpsuissero/** - tests/modules/nf-core/hpsuissero/** +htseq/count: + - modules/nf-core/htseq/count/** + - tests/modules/nf-core/htseq/count/** + hypo: - modules/nf-core/hypo/** - tests/modules/nf-core/hypo/** diff --git a/tests/modules/nf-core/htseq/count/main.nf b/tests/modules/nf-core/htseq/count/main.nf new file mode 100644 index 00000000000..9b653cbce29 --- /dev/null +++ b/tests/modules/nf-core/htseq/count/main.nf @@ -0,0 +1,40 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl = 2 + +include { HTSEQ_COUNT } from '../../../../../modules/nf-core/htseq/count/main.nf' + +workflow test_htseq_count_bam { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true) + + ] + gtf = [ + [ id:'test2'], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + ] + + + HTSEQ_COUNT (input,gtf) +} + +workflow test_htseq_count_cram { + + input = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true) + + ] + gtf = [ + [ id:'test2'], // meta map + file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true) + ] + + + HTSEQ_COUNT (input,gtf) +} + diff --git a/tests/modules/nf-core/htseq/count/nextflow.config b/tests/modules/nf-core/htseq/count/nextflow.config new file mode 100644 index 00000000000..153c686dc53 --- /dev/null +++ b/tests/modules/nf-core/htseq/count/nextflow.config @@ -0,0 +1,6 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + docker.registry = 'quay.io' + +} diff --git a/tests/modules/nf-core/htseq/count/test.yml b/tests/modules/nf-core/htseq/count/test.yml new file mode 100644 index 00000000000..9d45ed672fb --- /dev/null +++ b/tests/modules/nf-core/htseq/count/test.yml @@ -0,0 +1,19 @@ +- name: htseq count test_htseq_count_bam + command: nextflow run ./tests/modules/nf-core/htseq/count -entry test_htseq_count_bam -c ./tests/config/nextflow.config + tags: + - htseq + - htseq/count + files: + - path: output/htseq/test.txt + md5sum: 5e37b2292f1d21945acb532afa5eb615 + - path: output/htseq/versions.yml + +- name: htseq count test_htseq_count_cram + command: nextflow run ./tests/modules/nf-core/htseq/count -entry test_htseq_count_cram -c ./tests/config/nextflow.config + tags: + - htseq + - htseq/count + files: + - path: output/htseq/test.txt + md5sum: 53f9b007bd72115e8b72a40f859bc896 + - path: output/htseq/versions.yml