Skip to content

Commit

Permalink
Merge pull request #1231 from maxulysse/sortmerna
Browse files Browse the repository at this point in the history
Update sortmerna usage
  • Loading branch information
maxulysse authored Mar 5, 2024
2 parents a50212c + 04b3581 commit 34e51dd
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 5 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [PR #1220](https://github.com/nf-core/rnaseq/pull/1220) - Initialise nf-test and add pipeline level test
- [PR #1226](https://github.com/nf-core/rnaseq/pull/1226) - Reuse bbsplit index and don't keep overwriting ([#1225](https://github.com/nf-core/rnaseq/issues/1225))
- [PR #1229](https://github.com/nf-core/rnaseq/pull/1229) - Template update for nf-core/tools v2.13.1
- [PR #1231](https://github.com/nf-core/rnaseq/pull/1231) - Add sortmerna index possibilities

### Parameters

| Old parameter | New parameter |
| ------------- | ------------------- |
| | `--sortmerna_index` |

### Software dependencies

| Dependency | Old version | New version |
Expand Down
8 changes: 7 additions & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ params.gtf = getGenomeAttribute('gtf')
params.gff = getGenomeAttribute('gff')
params.gene_bed = getGenomeAttribute('bed12')
params.bbsplit_index = getGenomeAttribute('bbsplit')
params.sortmerna_index = getGenomeAttribute('sortmerna')
params.star_index = getGenomeAttribute('star')
params.hisat2_index = getGenomeAttribute('hisat2')
params.rsem_index = getGenomeAttribute('rsem')
Expand Down Expand Up @@ -70,18 +71,21 @@ workflow NFCORE_RNASEQ {
params.gene_bed,
params.splicesites,
params.bbsplit_fasta_list,
params.ribo_database_manifest,
params.star_index,
params.rsem_index,
params.salmon_index,
params.kallisto_index,
params.hisat2_index,
params.bbsplit_index,
params.sortmerna_index,
params.gencode,
params.featurecounts_group_type,
params.aligner,
params.pseudo_aligner,
params.skip_gtf_filter,
params.skip_bbsplit,
!params.remove_ribo_rna,
params.skip_alignment,
params.skip_pseudo_alignment
)
Expand Down Expand Up @@ -114,7 +118,9 @@ workflow NFCORE_RNASEQ {
PREPARE_GENOME.out.salmon_index,
PREPARE_GENOME.out.kallisto_index,
PREPARE_GENOME.out.bbsplit_index,
PREPARE_GENOME.out.splicesites
PREPARE_GENOME.out.sortmerna_index,
PREPARE_GENOME.out.splicesites,
!params.remove_ribo_rna && params.remove_ribo_rna
)
ch_versions = ch_versions.mix(RNASEQ.out.versions)

Expand Down
2 changes: 1 addition & 1 deletion modules/nf-core/sortmerna/nextflow.config

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,14 @@
"description": "Path to directory or tar.gz archive for pre-built BBSplit index.",
"help_text": "The BBSplit index will have to be built at least once with this pipeline (see `--save_reference` to save index). It can then be provided via `--bbsplit_index` for future runs."
},
"sortmerna_index": {
"type": "string",
"format": "path",
"exists": true,
"fa_icon": "fas fa-bezier-curve",
"description": "Path to directory or tar.gz archive for pre-built sortmerna index.",
"help_text": "The sortmerna index will have to be built at least once with this pipeline (see `--save_reference` to save index). It can then be provided via `--sortmerna_index` for future runs."
},
"remove_ribo_rna": {
"type": "boolean",
"fa_icon": "fas fa-trash-alt",
Expand Down
35 changes: 35 additions & 0 deletions subworkflows/local/prepare_genome/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ include { GUNZIP as GUNZIP_TRANSCRIPT_FASTA } from '../../../modules/nf-core/gun
include { GUNZIP as GUNZIP_ADDITIONAL_FASTA } from '../../../modules/nf-core/gunzip'

include { UNTAR as UNTAR_BBSPLIT_INDEX } from '../../../modules/nf-core/untar'
include { UNTAR as UNTAR_SORTMERNA_INDEX } from '../../../modules/nf-core/untar'
include { UNTAR as UNTAR_STAR_INDEX } from '../../../modules/nf-core/untar'
include { UNTAR as UNTAR_RSEM_INDEX } from '../../../modules/nf-core/untar'
include { UNTAR as UNTAR_HISAT2_INDEX } from '../../../modules/nf-core/untar'
Expand All @@ -20,6 +21,7 @@ include { CUSTOM_CATADDITIONALFASTA } from '../../../modules/nf-core/cus
include { CUSTOM_GETCHROMSIZES } from '../../../modules/nf-core/custom/getchromsizes'
include { GFFREAD } from '../../../modules/nf-core/gffread'
include { BBMAP_BBSPLIT } from '../../../modules/nf-core/bbmap/bbsplit'
include { SORTMERNA as SORTMERNA_INDEX } from '../../../modules/nf-core/sortmerna'
include { STAR_GENOMEGENERATE } from '../../../modules/nf-core/star/genomegenerate'
include { HISAT2_EXTRACTSPLICESITES } from '../../../modules/nf-core/hisat2/extractsplicesites'
include { HISAT2_BUILD } from '../../../modules/nf-core/hisat2/build'
Expand All @@ -43,18 +45,21 @@ workflow PREPARE_GENOME {
gene_bed // file: /path/to/gene.bed
splicesites // file: /path/to/splicesites.txt
bbsplit_fasta_list // file: /path/to/bbsplit_fasta_list.txt
sortmerna_fasta_list // file: /path/to/sortmerna_fasta_list.txt
star_index // directory: /path/to/star/index/
rsem_index // directory: /path/to/rsem/index/
salmon_index // directory: /path/to/salmon/index/
kallisto_index // directory: /path/to/kallisto/index/
hisat2_index // directory: /path/to/hisat2/index/
bbsplit_index // directory: /path/to/rsem/index/
sortmerna_index // directory: /path/to/sortmerna/index/
gencode // boolean: whether the genome is from GENCODE
featurecounts_group_type // string: The attribute type used to group feature types in the GTF file when generating the biotype plot with featureCounts
aligner // string: Specifies the alignment algorithm to use - available options are 'star_salmon', 'star_rsem' and 'hisat2'
pseudo_aligner // string: Specifies the pseudo aligner to use - available options are 'salmon'. Runs in addition to '--aligner'
skip_gtf_filter // boolean: Skip filtering of GTF for valid scaffolds and/ or transcript IDs
skip_bbsplit // boolean: Skip BBSplit for removal of non-reference genome reads
skip_sortmerna // boolean: Skip sortmerna for removal of reads mapping to sequences in sortmerna_fasta_list
skip_alignment // boolean: Skip all of the alignment-based processes within the pipeline
skip_pseudo_alignment // boolean: Skip all of the pseudoalignment-based processes within the pipeline

Expand Down Expand Up @@ -188,6 +193,7 @@ workflow PREPARE_GENOME {
//
def prepare_tool_indices = []
if (!skip_bbsplit) { prepare_tool_indices << 'bbsplit' }
if (!skip_sortmerna) { prepare_tool_indices << 'sortmerna' }
if (!skip_alignment) { prepare_tool_indices << aligner }
if (!skip_pseudo_alignment && pseudo_aligner) { prepare_tool_indices << pseudo_aligner }

Expand Down Expand Up @@ -218,6 +224,34 @@ workflow PREPARE_GENOME {
}
}

//
// Uncompress sortmerna index or generate from scratch if required
//
ch_sortmerna_index = Channel.empty()
if ('sortmerna' in prepare_tool_indices) {
if (sortmerna_index) {
if (sortmerna_index.endsWith('.tar.gz')) {
ch_sortmerna_index = UNTAR_SORTMERNA_INDEX ( [ [:], sortmerna_index ] ).untar.map { it[1] }
ch_versions = ch_versions.mix(UNTAR_SORTMERNA_INDEX.out.versions)
} else {
ch_sortmerna_index = Channel.value(file(sortmerna_index))
}
} else {
ch_sortmerna_fastas = Channel.from(file(sortmerna_fasta_list).readLines())
.map { row -> file(row, checkIfExists: true) }
.collect()
.map{ ['rrna_refs', it] }

SORTMERNA_INDEX (
Channel.of([[],[]]),
ch_sortmerna_fastas,
Channel.of([[],[]])
)
ch_sortmerna_index = SORTMERNA_INDEX.out.index.first()
ch_versions = ch_versions.mix(SORTMERNA_INDEX.out.versions)
}
}

//
// Uncompress STAR index or generate from scratch if required
//
Expand Down Expand Up @@ -336,6 +370,7 @@ workflow PREPARE_GENOME {
chrom_sizes = ch_chrom_sizes // channel: path(genome.sizes)
splicesites = ch_splicesites // channel: path(genome.splicesites.txt)
bbsplit_index = ch_bbsplit_index // channel: path(bbsplit/index/)
sortmerna_index = ch_sortmerna_index // channel: path(sortmerna/index/)
star_index = ch_star_index // channel: path(star/index/)
rsem_index = ch_rsem_index // channel: path(rsem/index/)
hisat2_index = ch_hisat2_index // channel: path(hisat2/index/)
Expand Down
13 changes: 13 additions & 0 deletions subworkflows/local/prepare_genome/nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,16 @@ if (!params.skip_bbsplit && params.bbsplit_fasta_list) {
}
}
}

if (params.remove_ribo_rna && params.ribo_database_manifest) {
process {
withName: 'SORTMERNA_INDEX' {
ext.args = '--index 1'
publishDir = [
path: { params.save_reference ? "${params.outdir}/genome/sortmerna" : params.outdir },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : params.save_reference ? filename : null }
]
}
}
}
24 changes: 21 additions & 3 deletions workflows/rnaseq/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ include { SAMTOOLS_SORT } from '../../mod
include { PRESEQ_LCEXTRAP } from '../../modules/nf-core/preseq/lcextrap'
include { QUALIMAP_RNASEQ } from '../../modules/nf-core/qualimap/rnaseq'
include { SORTMERNA } from '../../modules/nf-core/sortmerna'
include { SORTMERNA as SORTMERNA_INDEX } from '../../modules/nf-core/sortmerna'
include { STRINGTIE_STRINGTIE } from '../../modules/nf-core/stringtie/stringtie'
include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts'
include { MULTIQC } from '../../modules/nf-core/multiqc'
Expand Down Expand Up @@ -97,7 +98,9 @@ workflow RNASEQ {
ch_salmon_index // channel: path(salmon/index/)
ch_kallisto_index // channel: [ meta, path(kallisto/index/) ]
ch_bbsplit_index // channel: path(bbsplit/index/)
ch_sortmerna_index // channel: path(sortmerna/index/)
ch_splicesites // channel: path(genome.splicesites.txt)
make_sortmerna_index // boolean: Whether to create a sortmerna index before running sortmerna

main:

Expand Down Expand Up @@ -225,14 +228,29 @@ workflow RNASEQ {
//
// MODULE: Remove ribosomal RNA reads
//
// Check rRNA databases for sortmerna
if (params.remove_ribo_rna) {
ch_ribo_db = file(params.ribo_database_manifest)
ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines()).map { row -> file(row, checkIfExists: true) }.collect()
if (ch_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${ch_ribo_db.getName()}!"}

ch_sortmerna_fastas = Channel.from(ch_ribo_db.readLines())
.map { row -> file(row, checkIfExists: true) }
.collect()
.map{ ['rrna_refs', it] }

if (make_sortmerna_index) {
SORTMERNA_INDEX (
[[],[]],
ch_sortmerna_fastas,
[[],[]]
)
ch_sortmerna_index = SORTMERNA_INDEX.out.index.first()
}

SORTMERNA (
ch_filtered_reads,
ch_sortmerna_fastas.map{ it -> [ [ id:'fastas' ], it ] },
[[:],[]]
ch_sortmerna_fastas,
ch_sortmerna_index
)
.reads
.set { ch_filtered_reads }
Expand Down

0 comments on commit 34e51dd

Please sign in to comment.