Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add filtlong #919

Merged
merged 2 commits into from
Oct 27, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions modules/filtlong/functions.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
//
// Utility functions used in nf-core DSL2 module files
//

//
// Extract name of software tool from process name using $task.process
//
def getSoftwareName(task_process) {
return task_process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()
}

//
// Extract name of module from process name using $task.process
//
def getProcessName(task_process) {
return task_process.tokenize(':')[-1]
}

//
// Function to initialise default values and to generate a Groovy Map of available options for nf-core modules
//
def initOptions(Map args) {
def Map options = [:]
options.args = args.args ?: ''
options.args2 = args.args2 ?: ''
options.args3 = args.args3 ?: ''
options.publish_by_meta = args.publish_by_meta ?: []
options.publish_dir = args.publish_dir ?: ''
options.publish_files = args.publish_files
options.suffix = args.suffix ?: ''
return options
}

//
// Tidy up and join elements of a list to return a path string
//
def getPathFromList(path_list) {
def paths = path_list.findAll { item -> !item?.trim().isEmpty() } // Remove empty entries
paths = paths.collect { it.trim().replaceAll("^[/]+|[/]+\$", "") } // Trim whitespace and trailing slashes
return paths.join('/')
}

//
// Function to save/publish module results
//
def saveFiles(Map args) {
def ioptions = initOptions(args.options)
def path_list = [ ioptions.publish_dir ?: args.publish_dir ]

// Do not publish versions.yml unless running from pytest workflow
if (args.filename.equals('versions.yml') && !System.getenv("NF_CORE_MODULES_TEST")) {
return null
}
if (ioptions.publish_by_meta) {
def key_list = ioptions.publish_by_meta instanceof List ? ioptions.publish_by_meta : args.publish_by_meta
for (key in key_list) {
if (args.meta && key instanceof String) {
def path = key
if (args.meta.containsKey(key)) {
path = args.meta[key] instanceof Boolean ? "${key}_${args.meta[key]}".toString() : args.meta[key]
}
path = path instanceof String ? path : ''
path_list.add(path)
}
}
}
if (ioptions.publish_files instanceof Map) {
for (ext in ioptions.publish_files) {
if (args.filename.endsWith(ext.key)) {
def ext_list = path_list.collect()
ext_list.add(ext.value)
return "${getPathFromList(ext_list)}/$args.filename"
}
}
} else if (ioptions.publish_files == null) {
return "${getPathFromList(path_list)}/$args.filename"
}
}
43 changes: 43 additions & 0 deletions modules/filtlong/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// Import generic module functions
include { initOptions; saveFiles; getSoftwareName; getProcessName } from './functions'

params.options = [:]
options = initOptions(params.options)

process FILTLONG {
tag "$meta.id"
label 'process_low'
publishDir "${params.outdir}",
mode: params.publish_dir_mode,
saveAs: { filename -> saveFiles(filename:filename, options:params.options, publish_dir:getSoftwareName(task.process), meta:meta, publish_by_meta:['id']) }

conda (params.enable_conda ? "bioconda::filtlong=0.2.1" : null)
if (workflow.containerEngine == 'singularity' && !params.singularity_pull_docker_container) {
container "https://depot.galaxyproject.org/singularity/filtlong:0.2.1--h9a82719_0"
} else {
container "quay.io/biocontainers/filtlong:0.2.1--h9a82719_0"
}

input:
tuple val(meta), path(shortreads), path(longreads)

output:
tuple val(meta), path("${meta.id}_lr_filtlong.fastq.gz"), emit: reads
path "versions.yml" , emit: versions

script:
def prefix = options.suffix ? "${meta.id}${options.suffix}" : "${meta.id}"
def short_reads = meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}"
"""
filtlong \\
$short_reads \\
$options.args \\
$longreads \\
| gzip -n > ${prefix}_lr_filtlong.fastq.gz

cat <<-END_VERSIONS > versions.yml
${getProcessName(task.process)}:
${getSoftwareName(task.process)}: \$( filtlong --version | sed -e "s/Filtlong v//g" )
END_VERSIONS
"""
}
50 changes: 50 additions & 0 deletions modules/filtlong/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
name: filtlong
description: Filtlong filters long reads based on quality measures or short read data.
keywords:
- nanopore
- quality control
- QC
- filtering
- long reads
- short reads
tools:
- filtlong:
description: Filtlong is a tool for filtering long reads. It can take a set of long reads and produce a smaller, better subset. It uses both read length (longer is better) and read identity (higher is better) when choosing which reads pass the filter.
homepage: https://anaconda.org/bioconda/filtlong
documentation: None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

😢

tool_dev_url: https://github.com/rrwick/Filtlong
doi: ""
licence: ['GPL v3']

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- shortreads:
type: file
description: fastq file
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"
- longreads:
type: file
description: fastq file
pattern: "*.{fq,fastq,fq.gz,fastq.gz}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- reads:
type: file
description: Filtered (compressed) fastq file
pattern: "*.fastq.gz"

authors:
- "@d4straub"
4 changes: 4 additions & 0 deletions tests/config/pytest_modules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,10 @@ fgbio/sortbam:
- modules/fgbio/sortbam/**
- tests/modules/fgbio/sortbam/**

filtlong:
- modules/filtlong/**
- tests/modules/filtlong/**

flash:
- modules/flash/**
- tests/modules/flash/**
Expand Down
36 changes: 36 additions & 0 deletions tests/modules/filtlong/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl = 2

include { FILTLONG } from '../../../modules/filtlong/main.nf' addParams( options: [:] )

workflow test_filtlong {

input = [ [ id:'test', single_end:false ], // meta map
[],
[ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
]

FILTLONG ( input )
}

workflow test_filtlong_illumina_se {

input = [ [ id:'test', single_end:true ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ],
[ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
]

FILTLONG ( input )
}

workflow test_filtlong_illumina_pe {

input = [ [ id:'test', single_end:false ], // meta map
[ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ],
[ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) ]
]

FILTLONG ( input )
}
23 changes: 23 additions & 0 deletions tests/modules/filtlong/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
- name: filtlong test_filtlong
command: nextflow run tests/modules/filtlong -entry test_filtlong -c tests/config/nextflow.config
tags:
- filtlong
files:
- path: output/filtlong/test_lr_filtlong.fastq.gz
md5sum: 7029066c27ac6f5ef18d660d5741979a

- name: filtlong test_filtlong_illumina_se
command: nextflow run tests/modules/filtlong -entry test_filtlong_illumina_se -c tests/config/nextflow.config
tags:
- filtlong
files:
- path: output/filtlong/test_lr_filtlong.fastq.gz
md5sum: 7029066c27ac6f5ef18d660d5741979a

- name: filtlong test_filtlong_illumina_pe
command: nextflow run tests/modules/filtlong -entry test_filtlong_illumina_pe -c tests/config/nextflow.config
tags:
- filtlong
files:
- path: output/filtlong/test_lr_filtlong.fastq.gz
md5sum: 7029066c27ac6f5ef18d660d5741979a