Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve strandedness derivation in rnaseq preprocessing swf #5982

Merged
merged 2 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 63 additions & 12 deletions subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,62 @@ include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../fastq_fastqc_umitools_fast

def pass_trimmed_reads = [:]

public static String getSalmonInferredStrandedness(json_file) {
def lib_type = new JsonSlurper().parseText(json_file.text).get('library_types')[0]
def strandedness = 'reverse'
if (lib_type) {
if (lib_type in ['U', 'IU']) {
strandedness = 'unstranded'
} else if (lib_type in ['SF', 'ISF']) {
//
// Function to determine library type by comparing type counts.
//

//
def calculateStrandedness(forwardFragments, reverseFragments, unstrandedFragments, stranded_threshold=0.8, unstranded_threshold=0.1) {
def totalFragments = forwardFragments + reverseFragments + unstrandedFragments
def totalStrandedFragments = forwardFragments + reverseFragments

def library_strandedness = 'undetermined'
if (totalStrandedFragments > 0) {
def forwardProportion = forwardFragments / (totalStrandedFragments as double)
def reverseProportion = reverseFragments / (totalStrandedFragments as double)
def proportionDifference = Math.abs(forwardProportion - reverseProportion)

if (forwardProportion >= stranded_threshold) {
strandedness = 'forward'
} else if (lib_type in ['SR', 'ISR']) {
} else if (reverseProportion >= stranded_threshold) {
strandedness = 'reverse'
} else if (proportionDifference <= unstranded_threshold) {
strandedness = 'unstranded'
}
}
return strandedness

return [
inferred_strandedness: strandedness,
forwardFragments: (forwardFragments / (totalFragments as double)) * 100,
reverseFragments: (reverseFragments / (totalFragments as double)) * 100,
unstrandedFragments: (unstrandedFragments / (totalFragments as double)) * 100
]
}

//
// Function that parses Salmon quant 'lib_format_counts.json' output file to get inferred strandedness
//
def getSalmonInferredStrandedness(json_file, stranded_threshold = 0.8, unstranded_threshold = 0.1) {
// Parse the JSON content of the file
def libCounts = new JsonSlurper().parseText(json_file.text)

// Calculate the counts for forward and reverse strand fragments
def forwardKeys = ['SF', 'ISF', 'MSF', 'OSF']
def reverseKeys = ['SR', 'ISR', 'MSR', 'OSR']

// Calculate unstranded fragments (IU and U)
// NOTE: this is here for completeness, but actually all fragments have a
// strandedness (even if the overall library does not), so all these values
// will be '0'. See
// https://groups.google.com/g/sailfish-users/c/yxzBDv6NB6I
def unstrandedKeys = ['IU', 'U', 'MU']

def forwardFragments = forwardKeys.collect { libCounts[it] ?: 0 }.sum()
def reverseFragments = reverseKeys.collect { libCounts[it] ?: 0 }.sum()
def unstrandedFragments = unstrandedKeys.collect { libCounts[it] ?: 0 }.sum()

// Use shared calculation function to determine strandedness
return calculateStrandedness(forwardFragments, reverseFragments, unstrandedFragments, stranded_threshold, unstranded_threshold)
}

//
Expand Down Expand Up @@ -61,6 +104,8 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {
remove_ribo_rna // boolean: true/false: whether to run sortmerna to remove rrnas
with_umi // boolean: true/false: Enable UMI-based read deduplication.
umi_discard_read // integer: 0, 1 or 2
stranded_threshold // float: The fraction of stranded reads that must be assigned to a strandedness for confident assignment. Must be at least 0.5
unstranded_threshold // float: The difference in fraction of stranded reads assigned to 'forward' and 'reverse' below which a sample is classified as 'unstranded'

main:

Expand Down Expand Up @@ -251,10 +296,16 @@ workflow FASTQ_QC_TRIM_FILTER_SETSTRANDEDNESS {

FASTQ_SUBSAMPLE_FQ_SALMON
.out
.json_info
.lib_format_counts
.join(ch_strand_fastq.auto_strand)
.map { meta, json, reads ->
return [ meta + [ strandedness: getSalmonInferredStrandedness(json) ], reads ]
.map {
meta, json, reads ->
def salmon_strand_analysis = getSalmonInferredStrandedness(json, stranded_threshold=stranded_threshold, unstranded_threshold=unstranded_threshold)
strandedness = salmon_strand_analysis.inferred_strandedness
if (strandedness == 'undetermined') {
strandedness = 'unstranded'
}
return [ meta + [ strandedness: strandedness, salmon_strand_analysis: salmon_strand_analysis ], reads ]
}
.mix(ch_strand_fastq.known_strand)
.set { ch_strand_inferred_fastq }
Expand Down
12 changes: 12 additions & 0 deletions subworkflows/nf-core/fastq_qc_trim_filter_setstrandedness/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,18 @@ input:
description: |
After UMI barcode extraction discard either R1 or R2 by setting this
parameter to 1 or 2, respectively
- stranded_threshold:
type: float
min: 0.5
description: |
The fraction of stranded reads that must be assigned to a strandedness
for confident assignment. Must be at least 0.5.
- unstranded_threshold:
type: float
description: |
The difference in fraction of stranded reads assigned to 'forward' and
'reverse' below which a sample is classified as 'unstranded'.

output:
- reads:
type: file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ nextflow_workflow {
input[17] = true // remove_ribo_rna
input[18] = false // with_umi
input[19] = 0 // umi_discard_read
input[20] = 0.8 // stranded_threshold
input[21] = 0.1 // unstranded_threshold
"""
}
}
Expand Down Expand Up @@ -112,6 +114,8 @@ nextflow_workflow {
input[17] = true // remove_ribo_rna
input[18] = false // with_umi
input[19] = 0 // umi_discard_read
input[20] = 0.8 // stranded_threshold
input[21] = 0.1 // unstranded_threshold
"""
}
}
Expand Down
Loading