Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IDR module #908

Merged
merged 15 commits into from
Oct 30, 2021
Merged
78 changes: 78 additions & 0 deletions modules/idr/functions.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
//
// Utility functions used in nf-core DSL2 module files
//

//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}

//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}

//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}

//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}

//
// Function to save/publish module results
//
def saveFiles(Map args) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]

// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
56 changes: 56 additions & 0 deletions modules/idr/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'

params.options = [:]
options = initOptions(params.options)

process IDR {
tag "$prefix"
label 'process_low'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:[:], publish_by_meta:[]) }

conda (params.enable_conda ? "bioconda::idr=2.0.4.2" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/idr:2.0.4.2--py39hcbe4a3b_5"
} else {
container "quay.io/biocontainers/idr:2.0.4.2--py38h9af456f_5"
}

input:
path peaks
val peak_type
val prefix

output:
path "*idrValues.txt", emit: idr
path "*log.txt" , emit: log
path "*.png" , emit: png
path "versions.yml" , emit: versions

script:
if (peaks.toList().size < 2) {
log.error "[ERROR] idr needs at least two replicates only one provided."
}
def peak_types = ['narrowPeak', 'broadPeak', 'bed']
if (!peak_types.contains(peak_type)) {
log.error "[ERROR] Invalid option: '${peak_type}'. Valid options for 'peak_type': ${peak_types.join(', ')}."
}
def idr_vals = prefix ? "${prefix}.idrValues.txt" : "idrValues.txt"
def log_file = prefix ? "${prefix}.log.txt" : "log.txt"
"""
idr \\
--samples $peaks \\
--input-file-type $peak_type \\
--output-file $idr_vals \\
--log-output-file $log_file \\
--plot \\
$options.args

cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$(echo \$(idr --version 2>&1) | sed 's/^.*IDR //; s/ .*\$//')
END_VERSIONS
"""
}
53 changes: 53 additions & 0 deletions modules/idr/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
name: idr
description: |
Measures reproducibility of ChIP-seq, ATAC-seq peaks using IDR (Irreproducible
Discovery Rate)
keywords:
- IDR
- peaks
- ChIP-seq
- ATAC-seq
tools:
- idr:
description: |
The IDR (Irreproducible Discovery Rate) framework is a unified approach
to measure the reproducibility of findings identified from replicate
experiments and provide highly stable thresholds based on reproducibility.
homepage: None
documentation: None
tool_dev_url: https://github.com/kundajelab/idr
doi: ""
licence: ['GPL v2']
input:
- peaks:
type: tuple of two files
description: BED, narrowPeak or broadPeak files of replicates
pattern: "*"
- peak_type:
type: value
description: Type of peak file
pattern: "{narrowPeak,broadPeak,bed}"
- prefix:
type: value
description: Prefix for output files
output:
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- idr:
type: file
description: Text file containing IDR values
pattern: "*.{txt}"
- log:
type: file
description: Log file
pattern: "*.{txt}"
- png:
type: file
description: Plot generated by idr
pattern: "*{.png}"

authors:
- "@drpatelh"
- "@joseespinosa"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,10 @@ homer/makeucscfile:
- modules/homer/makeucscfile/**
- tests/modules/homer/makeucscfile/**

idr:
- modules/idr/**
- tests/modules/idr/**

iqtree:
- modules/iqtree/**
- tests/modules/iqtree/**
Expand Down
7 changes: 7 additions & 0 deletions tests/config/test_data.config
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,13 @@ params {
test2_genome_vcf_gz_tbi = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.gz.tbi"
test2_genome_vcf_idx = "${test_data_dir}/genomics/homo_sapiens/illumina/gvcf/test2.genome.vcf.idx"

test_broad_peaks = "${test_data_dir}/genomics/homo_sapiens/illumina/broadpeak/test.broadPeak"
test2_broad_peaks = "${test_data_dir}/genomics/homo_sapiens/illumina/broadpeak/test2.broadPeak"
JoseEspinosa marked this conversation as resolved.
Show resolved Hide resolved

test_narrow_peaks = "${test_data_dir}/genomics/homo_sapiens/illumina/narrowpeak/test.narrowPeak"
test2_narrow_peaks = "${test_data_dir}/genomics/homo_sapiens/illumina/narrowpeak/test2.narrowPeak"
JoseEspinosa marked this conversation as resolved.
Show resolved Hide resolved


JoseEspinosa marked this conversation as resolved.
Show resolved Hide resolved
test_10x_1_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test.10x_1.fastq.gz"
test_10x_2_fastq_gz = "${test_data_dir}/genomics/homo_sapiens/illumina/10xgenomics/test.10x_2.fastq.gz"

Expand Down
57 changes: 57 additions & 0 deletions tests/modules/idr/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { IDR } from '../../../modules/idr/main.nf' addParams( options: [:] )

myFile = file('http://jordan.biology.gatech.edu/page/software/broadpeak/downloads/H3K27me3.bed', checkIfExists: true)
myFile.copyTo('peak_test')

workflow test_idr_narrowpeak {

input = [
file(params.test_data['homo_sapiens']['illumina']['test_narrow_peaks'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_narrow_peaks'], checkIfExists: true)
]

IDR ( input, 'narrowPeak', 'test' )
}

workflow test_idr_broadpeak {

input = [
file(params.test_data['homo_sapiens']['illumina']['test_broad_peaks'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_broad_peaks'], checkIfExists: true)
]

IDR ( input, 'broadPeak', 'test' )
}

workflow test_idr_noprefix {

input = [
file(params.test_data['homo_sapiens']['illumina']['test_narrow_peaks'], checkIfExists: true),
file(params.test_data['homo_sapiens']['illumina']['test2_narrow_peaks'], checkIfExists: true)
]

IDR ( input, 'narrowPeak', '' )
}

// workflow test_idr_bed {

// // Also tried with these two replicates in bed format from this course
// // (http://jvanheld.github.io/cisreg_course/chip-seq/practical/annotation.html) and did not work
// // http://pedagogix-tagc.univ-mrs.fr/courses/data/ngs/td_chip_seq/all/siGATA_ER_E2_r1_SRX176857_peaks.bed
// // http://pedagogix-tagc.univ-mrs.fr/courses/data/ngs/td_chip_seq/all/siGATA_ER_E2_r2_SRX176859_peaks.bed

// input = [
// //Does not work with the data in test-datasets
// // file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true),
// // file(params.test_data['sarscov2']['genome']['test2_bed'], checkIfExists: true)
// // Also tried to see weather it swallow the same file as narrow and broad mode, but it does not
// // file('https://raw.githubusercontent.com/kundajelab/idr/master/tests/data/peak1', checkIfExists: true),
// // file('https://raw.githubusercontent.com/kundajelab/idr/master/tests/data/peak2', checkIfExists: true)
// ]

// IDR ( input, 'bed', 'test' )
// }
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// workflow test_idr_bed {
// // Also tried with these two replicates in bed format from this course
// // (http://jvanheld.github.io/cisreg_course/chip-seq/practical/annotation.html) and did not work
// // http://pedagogix-tagc.univ-mrs.fr/courses/data/ngs/td_chip_seq/all/siGATA_ER_E2_r1_SRX176857_peaks.bed
// // http://pedagogix-tagc.univ-mrs.fr/courses/data/ngs/td_chip_seq/all/siGATA_ER_E2_r2_SRX176859_peaks.bed
// input = [
// //Does not work with the data in test-datasets
// // file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true),
// // file(params.test_data['sarscov2']['genome']['test2_bed'], checkIfExists: true)
// // Also tried to see weather it swallow the same file as narrow and broad mode, but it does not
// // file('https://raw.githubusercontent.com/kundajelab/idr/master/tests/data/peak1', checkIfExists: true),
// // file('https://raw.githubusercontent.com/kundajelab/idr/master/tests/data/peak2', checkIfExists: true)
// ]
// IDR ( input, 'bed', 'test' )
// }

35 changes: 35 additions & 0 deletions tests/modules/idr/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
- name: idr test_idr_narrowpeak
command: nextflow run tests/modules/idr -entry test_idr_narrowpeak -c tests/config/nextflow.config
tags:
- idr
files:
- path: output/idr/test.idrValues.txt
md5sum: 09be837cc6abbc3eb5958b74802eea55
- path: output/idr/test.idrValues.txt.png
md5sum: 4a7143ccc0ccadb37c2317bf626e6d96
- path: output/idr/test.log.txt
md5sum: 6443507ac66b9d3b64bc56b78328083e

- name: idr test_idr_broadpeak
command: nextflow run tests/modules/idr -entry test_idr_broadpeak -c tests/config/nextflow.config
tags:
- idr
files:
- path: output/idr/test.idrValues.txt
md5sum: 387441c716815e4caec3e70a2cc11a4a
- path: output/idr/test.idrValues.txt.png
md5sum: 7204083ca5b920b4215a5991c12cb4e7
- path: output/idr/test.log.txt
md5sum: e6917133112b5cec135c182ffac19237

- name: idr test_idr_noprefix
command: nextflow run tests/modules/idr -entry test_idr_noprefix -c tests/config/nextflow.config
tags:
- idr
files:
- path: output/idr/idrValues.txt
md5sum: 09be837cc6abbc3eb5958b74802eea55
- path: output/idr/idrValues.txt.png
md5sum: 4a7143ccc0ccadb37c2317bf626e6d96
- path: output/idr/log.txt
md5sum: 6443507ac66b9d3b64bc56b78328083e