diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 133e6739..9a7737fc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -29,8 +29,10 @@ jobs: ANALYSIS: - "test_screening" - "test_screening_paired" + - "test_screening_rra" - "test_targeted" - "test_umis" + steps: - name: Check out pipeline code uses: actions/checkout@v3 diff --git a/conf/modules.config b/conf/modules.config index afae335d..f1c930e4 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -173,6 +173,14 @@ process { ] } + withName: MATRICESCREATION { + publishDir = [ + path: { "${params.outdir}/design_matrix" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: MINIMAP2_ALIGN_UMI_1 { ext.args = '-x map-ont' ext.prefix = { "${reads.baseName}_cycle1" } diff --git a/conf/test_screening.config b/conf/test_screening.config index 7661aacf..0b5c5b70 100644 --- a/conf/test_screening.config +++ b/conf/test_screening.config @@ -23,7 +23,6 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/samplesheet_test.csv' analysis = 'screening' crisprcleanr = "Brunello_Library" - mle_design_matrix = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/design_matrix.txt" library = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/brunello_target_sequence.txt" - rra_contrasts = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/rra_contrasts.txt" + contrasts = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/rra_contrasts.txt" } diff --git a/conf/test_screening_rra.config b/conf/test_screening_rra.config new file mode 100644 index 00000000..5042ec35 --- /dev/null +++ b/conf/test_screening_rra.config @@ -0,0 +1,29 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/crisprseq -profile test_screening_rra, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test screening profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/samplesheet_test.csv' + analysis = 'screening' + crisprcleanr = "Brunello_Library" + library = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/brunello_target_sequence.txt" + contrasts = "https://raw.githubusercontent.com/nf-core/test-datasets/crisprseq/testdata/rra_contrasts.txt" + rra = true +} diff --git a/modules/local/matricescreation.nf b/modules/local/matricescreation.nf new file mode 100644 index 00000000..cb1005ef --- /dev/null +++ b/modules/local/matricescreation.nf @@ -0,0 +1,53 @@ +process MATRICESCREATION { + label 'process_single' + + conda 'r-ggplot2=3.4.3 bioconductor-shortread=1.58.0 r-ggpubr=0.6.0 r-ggmsa=1.0.2 r-seqmagick=0.1.6 r-tidyr=1.3.0 r-ggseqlogo=0.1 r-cowplot=1.1.1 r-seqinr=4.2_30 r-optparse=1.7.3 r-dplyr=1.1.2 r-plyr=1.8.8 r-stringr=1.5.0 r-plotly=4.10.2' + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-6de07928379e6eface08a0019c4a1d6b5192e805:0d77388f37ddd923a087f7792e30e83ab54c918c-0' : + 'biocontainers/mulled-v2-6de07928379e6eface08a0019c4a1d6b5192e805:0d77388f37ddd923a087f7792e30e83ab54c918c-0' }" + + input: + path(contrasts) + + output: + path("*.txt"), emit: design_matrix + // path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + + """ + #!/usr/bin/env Rscript + #### author: Laurence Kuhlburger + #### Released under the MIT license. See git repository (https://github.com/nf-core/crisprseq) for full license text. + #### + #### Orient a reference sequence according to reads orientation. + + data <- read.table("$contrasts", header = TRUE, sep = ";", stringsAsFactors = FALSE) + print(data) + # Loop through each row in the data + for (i in 1:nrow(data)) { + # Extract control and treatment samples for the current row + control_samples <- unlist(strsplit(data\$reference[i], ",")) + treatment_samples <- unlist(strsplit(data\$treatment[i], ",")) + + # Create a vector of all unique samples + all_samples <- unique(c(control_samples, treatment_samples)) + + # Initialize a matrix to store the design matrix + design_matrix <- data.frame(matrix(0, nrow = length(all_samples), ncol = 3, + dimnames = list(all_samples, c("Samples", "baseline", paste0(gsub(',', '_', data\$treatment[i] ),"_vs_", data\$reference[i]))))) + + # Set baseline and treatment values in the design matrix + design_matrix[, "Samples"] <- rownames(design_matrix) + design_matrix\$baseline <- 1 + design_matrix[treatment_samples, paste0(gsub(',', '_', data\$treatment[1] ),"_vs_",gsub(",","_",data\$reference[i]))] <- 1 + + # Print the design matrix to a file + output_file <- paste0(gsub(',', '_', data\$treatment[1] ),"_vs_",gsub(",","_",data\$reference[i]),".txt") + write.table(design_matrix, output_file, sep = "\t", quote = FALSE, row.names=FALSE) + } + """ +} diff --git a/nextflow.config b/nextflow.config index 5f6dc691..0f86bcbb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,11 +16,12 @@ params { protospacer = null library = null crisprcleanr = null - rra_contrasts = null + contrasts = null mle_design_matrix = null count_table = null min_reads = 30 min_targeted_genes = 3 + rra = false bagel_reference_essentials = 'https://raw.githubusercontent.com/hart-lab/bagel/master/CEGv2.txt' bagel_reference_nonessentials = 'https://raw.githubusercontent.com/hart-lab/bagel/master/NEGv1.txt' @@ -199,6 +200,7 @@ profiles { test_screening_full { includeConfig 'conf/test_screening_full.config' } test_screening { includeConfig 'conf/test_screening.config' } test_screening_paired { includeConfig 'conf/test_screening_paired.config' } + test_screening_rra { includeConfig 'conf/test_screening_rra.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile diff --git a/nextflow_schema.json b/nextflow_schema.json index ddc1856e..58a4d67c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -156,12 +156,16 @@ "exists": true, "description": "Design matrix used for MAGeCK MLE to call essential genes under multiple conditions while considering sgRNA knockout efficiency" }, - "rra_contrasts": { + "contrasts": { "type": "string", "format": "file-path", "exists": true, "description": "Comma-separated file with the conditions to be compared. The first one will be the reference (control)" }, + "rra": { + "type": "boolean", + "description": "Parameter in case MAGeCK RRA should be ran instead of MAGeCK MLE." + }, "count_table": { "type": "string", "format": "file-path", diff --git a/workflows/crisprseq_screening.nf b/workflows/crisprseq_screening.nf index 83cd51b9..32b02298 100644 --- a/workflows/crisprseq_screening.nf +++ b/workflows/crisprseq_screening.nf @@ -24,6 +24,14 @@ if(params.mle_design_matrix) { .set { ch_design } } +if(params.rra && params.mle_design_matrix) { + warning "mle_design_matrix will only be used for the MAGeCK MLE computations" + } + +if(params.rra && !params.contrasts) { + error "Please also provide the contrasts table to compare the samples for MAGeCK RRA" + } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES @@ -66,6 +74,7 @@ include { BAGEL2_FC } from '../modules/local/bagel2/fc' include { BAGEL2_BF } from '../modules/local/bagel2/bf' include { BAGEL2_PR } from '../modules/local/bagel2/pr' include { BAGEL2_GRAPH } from '../modules/local/bagel2/graph' +include { MATRICESCREATION } from '../modules/local/matricescreation' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -157,8 +166,8 @@ workflow CRISPRSEQ_SCREENING { }.set { ch_counts } } - if(params.rra_contrasts) { - Channel.fromPath(params.rra_contrasts) + if(params.rra) { + Channel.fromPath(params.contrasts) .splitCsv(header:true, sep:';' ) .set { ch_contrasts } counts = ch_contrasts.combine(ch_counts) @@ -175,8 +184,8 @@ workflow CRISPRSEQ_SCREENING { ch_versions = ch_versions.mix(MAGECK_GRAPHRRA.out.versions) } - if(params.rra_contrasts) { - Channel.fromPath(params.rra_contrasts) + if(params.contrasts && params.rra) { + Channel.fromPath(params.contrasts) .splitCsv(header:true, sep:';' ) .set { ch_bagel } counts = ch_bagel.combine(ch_counts) @@ -216,8 +225,14 @@ workflow CRISPRSEQ_SCREENING { } - if(params.mle_design_matrix) { - ch_mle = ch_counts.combine(ch_design) + if((params.mle_design_matrix) || (params.contrasts && !rra)) { + if(params.mle_design_matrix) { + ch_mle = ch_counts.combine(ch_design) + } + if(params.contrasts) { + MATRICESCREATION(params.contrasts) + ch_mle = ch_counts.combine(MATRICESCREATION.out.design_matrix) + } ch_mle.map { it -> [[id: it[1].getBaseName()], it[0], it[1]] }.set { ch_designed_mle }