Skip to content

Commit

Permalink
Change the workflow to take MAGeCK MLE as a default and add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurenceKuhl committed Dec 12, 2023
1 parent 787497e commit 8739b94
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 10 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@ jobs:
ANALYSIS:
- "test_screening"
- "test_screening_paired"
- "test_screening_rra"
- "test_targeted"
- "test_umis"

steps:
- name: Check out pipeline code
uses: actions/checkout@v3
Expand Down
8 changes: 8 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,14 @@ process {
]
}

withName: MATRICESCREATION {
publishDir = [
path: { "${params.outdir}/design_matrix" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}

withName: MINIMAP2_ALIGN_UMI_1 {
ext.args = '-x map-ont'
ext.prefix = { "${reads.baseName}_cycle1" }
Expand Down
3 changes: 1 addition & 2 deletions conf/test_screening.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ params {
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/samplesheet_test.csv'
analysis = 'screening'
crisprcleanr = "Brunello_Library"
mle_design_matrix = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/design_matrix.txt"
library = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/brunello_target_sequence.txt"
rra_contrasts = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/rra_contrasts.txt"
contrasts = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/rra_contrasts.txt"
}
29 changes: 29 additions & 0 deletions conf/test_screening_rra.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
nextflow run nf-core/crisprseq -profile test_screening_rra,<conda/docker/singularity> --outdir <OUTDIR>
----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test screening profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'

// Input data
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/samplesheet_test.csv'
analysis = 'screening'
crisprcleanr = "Brunello_Library"
library = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/brunello_target_sequence.txt"
contrasts = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/rra_contrasts.txt"
rra = true
}
53 changes: 53 additions & 0 deletions modules/local/matricescreation.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
process MATRICESCREATION {
label 'process_single'

conda 'r-ggplot2=3.4.3 bioconductor-shortread=1.58.0 r-ggpubr=0.6.0 r-ggmsa=1.0.2 r-seqmagick=0.1.6 r-tidyr=1.3.0 r-ggseqlogo=0.1 r-cowplot=1.1.1 r-seqinr=4.2_30 r-optparse=1.7.3 r-dplyr=1.1.2 r-plyr=1.8.8 r-stringr=1.5.0 r-plotly=4.10.2'
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-6de07928379e6eface08a0019c4a1d6b5192e805:0d77388f37ddd923a087f7792e30e83ab54c918c-0' :
'biocontainers/mulled-v2-6de07928379e6eface08a0019c4a1d6b5192e805:0d77388f37ddd923a087f7792e30e83ab54c918c-0' }"

input:
path(contrasts)

output:
path("*.txt"), emit: design_matrix
// path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:

"""
#!/usr/bin/env Rscript
#### author: Laurence Kuhlburger
#### Released under the MIT license. See git repository (https://github.com/nf-core/crisprseq) for full license text.
####
#### Orient a reference sequence according to reads orientation.
data <- read.table("$contrasts", header = TRUE, sep = ";", stringsAsFactors = FALSE)
print(data)
# Loop through each row in the data
for (i in 1:nrow(data)) {
# Extract control and treatment samples for the current row
control_samples <- unlist(strsplit(data\$reference[i], ","))
treatment_samples <- unlist(strsplit(data\$treatment[i], ","))
# Create a vector of all unique samples
all_samples <- unique(c(control_samples, treatment_samples))
# Initialize a matrix to store the design matrix
design_matrix <- data.frame(matrix(0, nrow = length(all_samples), ncol = 3,
dimnames = list(all_samples, c("Samples", "baseline", paste0(gsub(',', '_', data\$treatment[i] ),"_vs_", data\$reference[i])))))
# Set baseline and treatment values in the design matrix
design_matrix[, "Samples"] <- rownames(design_matrix)
design_matrix\$baseline <- 1
design_matrix[treatment_samples, paste0(gsub(',', '_', data\$treatment[1] ),"_vs_",gsub(",","_",data\$reference[i]))] <- 1
# Print the design matrix to a file
output_file <- paste0(gsub(',', '_', data\$treatment[1] ),"_vs_",gsub(",","_",data\$reference[i]),".txt")
write.table(design_matrix, output_file, sep = "\t", quote = FALSE, row.names=FALSE)
}
"""
}
4 changes: 3 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@ params {
protospacer = null
library = null
crisprcleanr = null
rra_contrasts = null
contrasts = null
mle_design_matrix = null
count_table = null
min_reads = 30
min_targeted_genes = 3
rra = false
bagel_reference_essentials = 'https://raw.githubusercontent.com/hart-lab/bagel/master/CEGv2.txt'
bagel_reference_nonessentials = 'https://raw.githubusercontent.com/hart-lab/bagel/master/NEGv1.txt'

Expand Down Expand Up @@ -199,6 +200,7 @@ profiles {
test_screening_full { includeConfig 'conf/test_screening_full.config' }
test_screening { includeConfig 'conf/test_screening.config' }
test_screening_paired { includeConfig 'conf/test_screening_paired.config' }
test_screening_rra { includeConfig 'conf/test_screening_rra.config' }
}

// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
Expand Down
6 changes: 5 additions & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,16 @@
"exists": true,
"description": "Design matrix used for MAGeCK MLE to call essential genes under multiple conditions while considering sgRNA knockout efficiency"
},
"rra_contrasts": {
"contrasts": {
"type": "string",
"format": "file-path",
"exists": true,
"description": "Comma-separated file with the conditions to be compared. The first one will be the reference (control)"
},
"rra": {
"type": "boolean",
"description": "Parameter in case MAGeCK RRA should be ran instead of MAGeCK MLE."
},
"count_table": {
"type": "string",
"format": "file-path",
Expand Down
27 changes: 21 additions & 6 deletions workflows/crisprseq_screening.nf
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ if(params.mle_design_matrix) {
.set { ch_design }
}

if(params.rra && params.mle_design_matrix) {
warning "mle_design_matrix will only be used for the MAGeCK MLE computations"
}

if(params.rra && !params.contrasts) {
error "Please also provide the contrasts table to compare the samples for MAGeCK RRA"
}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
CONFIG FILES
Expand Down Expand Up @@ -66,6 +74,7 @@ include { BAGEL2_FC } from '../modules/local/bagel2/fc'
include { BAGEL2_BF } from '../modules/local/bagel2/bf'
include { BAGEL2_PR } from '../modules/local/bagel2/pr'
include { BAGEL2_GRAPH } from '../modules/local/bagel2/graph'
include { MATRICESCREATION } from '../modules/local/matricescreation'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -157,8 +166,8 @@ workflow CRISPRSEQ_SCREENING {
}.set { ch_counts }
}

if(params.rra_contrasts) {
Channel.fromPath(params.rra_contrasts)
if(params.rra) {
Channel.fromPath(params.contrasts)
.splitCsv(header:true, sep:';' )
.set { ch_contrasts }
counts = ch_contrasts.combine(ch_counts)
Expand All @@ -175,8 +184,8 @@ workflow CRISPRSEQ_SCREENING {
ch_versions = ch_versions.mix(MAGECK_GRAPHRRA.out.versions)
}

if(params.rra_contrasts) {
Channel.fromPath(params.rra_contrasts)
if(params.contrasts && params.rra) {
Channel.fromPath(params.contrasts)
.splitCsv(header:true, sep:';' )
.set { ch_bagel }
counts = ch_bagel.combine(ch_counts)
Expand Down Expand Up @@ -216,8 +225,14 @@ workflow CRISPRSEQ_SCREENING {

}

if(params.mle_design_matrix) {
ch_mle = ch_counts.combine(ch_design)
if((params.mle_design_matrix) || (params.contrasts && !rra)) {
if(params.mle_design_matrix) {
ch_mle = ch_counts.combine(ch_design)
}
if(params.contrasts) {
MATRICESCREATION(params.contrasts)
ch_mle = ch_counts.combine(MATRICESCREATION.out.design_matrix)
}
ch_mle.map {
it -> [[id: it[1].getBaseName()], it[0], it[1]]
}.set { ch_designed_mle }
Expand Down

0 comments on commit 8739b94

Please sign in to comment.