diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d2e2955..eac1d11b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Fix cutadapt 3' and 5' no such variable found bug ([#187](https://github.com/nf-core/crisprseq/pull/187)) +- Fix design matrix bug that introduced dots instead of a hyphen ([#190](https://github.com/nf-core/crisprseq/pull/190)) +- Make output of FluteMLE optional as when some pathways produce bugs some channels are then empty ([#190](https://github.com/nf-core/crisprseq/pull/190)) +- Fix a typo in crisprcleanr/normalize, when a user inputs a file ([#192](https://github.com/nf-core/crisprseq/pull/192)) ### Deprecated diff --git a/README.md b/README.md index 3600d458..afa1df22 100644 --- a/README.md +++ b/README.md @@ -127,14 +127,15 @@ Main developers: We thank the following people for their extensive assistance in the development of this pipeline: - [@alan-tracey](https://github.com/alan-tracey) +- [@bolenala](https://github.com/bolenala) - [@ggabernet](https://github.com/ggabernet) - [@jianhong](https://github.com/jianhong) +- [@joannakraw](https://github.com/joannakraw) - [@mashehu](https://github.com/mashehu) +- [@metinyazar](https://github.com/metinyazar) - [@msanvicente](https://github.com/msanvicente) - [@mschaffer-incyte](https://github.com/mschaffer-incyte) - [@SusiJo](https://github.com/SusiJo) -- [@joannakraw](https://github.com/joannakraw) -- [@metinyazar](https://github.com/metinyazar) ## Contributions and Support diff --git a/bin/BAGEL.py b/bin/BAGEL.py index ba757feb..d7add161 100755 --- a/bin/BAGEL.py +++ b/bin/BAGEL.py @@ -677,7 +677,6 @@ def calculate_bayes_factors( print("Iter TrainEss TrainNon TestSet") sys.stdout.flush() for loop in range(LOOPCOUNT): - # currentbf = {} printstr = "" printstr += str(loop) @@ -984,7 +983,6 @@ def calculate_bayes_factors( # for loop in range(LOOPCOUNT): - # currentnbf = {} printstr = "" printstr += str(loop) diff --git a/bin/drugz.py b/bin/drugz.py index 3e822ae5..6a0a6c44 100755 --- a/bin/drugz.py +++ b/bin/drugz.py @@ -123,7 +123,6 @@ def calculate_fold_change( fc_replicate_id = "fc_{replicate}".format(replicate=replicate) fc_zscore_id = "zscore_" + fc_replicate_id empirical_bayes_id = "eb_std_{replicate}".format(replicate=replicate) - # one_based_idx = replicate + 1 # Get the control and treatment sample ids for each replicate control_sample = control_samples[replicate] diff --git a/docs/usage/screening.md b/docs/usage/screening.md index 5c0a4bbb..01acbc82 100644 --- a/docs/usage/screening.md +++ b/docs/usage/screening.md @@ -89,7 +89,8 @@ Running MAGeCK MLE and BAGEL2 with a contrast file will also output a Venn diagr ### Running MAGeCK RRA only -MAGeCK RRA performs robust ranking aggregation to identify genes that are consistently ranked highly across multiple replicate screens. To run MAGeCK RRA, you can define the contrasts as previously stated in the last section (with a `.txt` extension) and also specify `--rra`. +MAGeCK RRA performs robust ranking aggregation to identify genes that are consistently ranked highly across multiple replicate screens. To run MAGeCK RRA, you can define the contrasts as previously stated in the last section with --contrasts your_file.txt(with a `.txt` extension) and also specify `--rra`. +MAGeCK RRA performs robust ranking aggregation to identify genes that are consistently ranked highly across multiple replicate screens. To run MAGeCK RRA, you can define the contrasts as previously stated in the last section with `--contrasts your_file.txt` (with a `.txt` extension) and also specify `--rra`. ### Running MAGeCK MLE only @@ -109,6 +110,10 @@ This label is not mandatory as in case you are running time series. If you wish The downstream analysis involves distinguishing essential, non-essential, and target-associated genes. Additionally, it encompasses conducting biological functional category analysis and pathway enrichment analysis for these genes. Furthermore, it provides visualization of genes within pathways, enhancing user exploration of screening data. MAGECKFlute is run automatically after MAGeCK MLE and for each MLE design matrice. If you have used the `--day0_label`, MAGeCKFlute will be ran on all the other conditions. Please note that the DepMap data is used for these plots. +#### Using negative control sgRNAs for MAGeCK MLE + +You can add the parameter `--mle_control_sgrna` followed by your file (one non targeting control sgRNA per line) to integrate the control sgRNA in MAGeCK MLE. + ### Running BAGEL2 BAGEL2 (Bayesian Analysis of Gene Essentiality with Location) is a computational tool developed by the Hart Lab at Harvard University. It is designed for analyzing large-scale genetic screens, particularly CRISPR-Cas9 screens, to identify genes that are essential for the survival or growth of cells under different conditions. BAGEL2 integrates information about the location of guide RNAs within a gene and leverages this information to improve the accuracy of gene essentiality predictions. diff --git a/modules/local/mageck/flutemle.nf b/modules/local/mageck/flutemle.nf index b2b0c6cd..e620c446 100644 --- a/modules/local/mageck/flutemle.nf +++ b/modules/local/mageck/flutemle.nf @@ -12,11 +12,11 @@ process MAGECK_FLUTEMLE { tuple val(meta), path(gene_summary) output: - tuple val(meta), path("MAGeCKFlute_*/Enrichment/*") , emit: enrich - tuple val(meta), path("MAGeCKFlute_*/QC/*") , emit: qc - tuple val(meta), path("MAGeCKFlute_*/Selection/*") , emit: select - tuple val(meta), path("MAGeCKFlute_*/PathwayView/*"), emit: pathwayview - path "versions.yml" , emit: versions + tuple val(meta), path("MAGeCKFlute_*/Enrichment/*") , emit: enrich , optional: true + tuple val(meta), path("MAGeCKFlute_*/QC/*") , emit: qc , optional: true + tuple val(meta), path("MAGeCKFlute_*/Selection/*") , emit: select , optional: true + tuple val(meta), path("MAGeCKFlute_*/PathwayView/*"), emit: pathwayview, optional: true + path "versions.yml" , emit: versions , optional: true when: task.ext.when == null || task.ext.when diff --git a/modules/local/matricescreation.nf b/modules/local/matricescreation.nf index 86c00ee4..9470b5da 100644 --- a/modules/local/matricescreation.nf +++ b/modules/local/matricescreation.nf @@ -33,6 +33,9 @@ process MATRICESCREATION { dimnames = list(all_samples, c("Samples", "baseline", name)))) + # R automatically converts "-" to "." in the column names + # so here we re-assign the column names to keep the dashes defined by the user + colnames(design_matrix) <- c("Samples", "baseline", name) # Set baseline and treatment values in the design matrix design_matrix[, "Samples"] <- rownames(design_matrix) @@ -40,6 +43,7 @@ process MATRICESCREATION { design_matrix[treatment_samples, name] <- 1 design_matrix[treatment_samples, paste0(gsub(',', '_', '$meta.treatment'),"_vs_",gsub(",","_",'$meta.reference'))] <- 1 + # Print the design matrix to a file output_file <- paste0(gsub(',', '_', '$meta.treatment' ),"_vs_",gsub(",","_",'$meta.reference'),".txt") write.table(design_matrix, output_file, sep = "\t", quote = FALSE, row.names=FALSE) diff --git a/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff b/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff index daa9446f..c3438e19 100644 --- a/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff +++ b/modules/nf-core/crisprcleanr/normalize/crisprcleanr-normalize.diff @@ -1,4 +1,7 @@ Changes in module 'nf-core/crisprcleanr/normalize' +'modules/nf-core/crisprcleanr/normalize/environment.yml' is unchanged +'modules/nf-core/crisprcleanr/normalize/meta.yml' is unchanged +Changes in 'crisprcleanr/normalize/main.nf': --- modules/nf-core/crisprcleanr/normalize/main.nf +++ modules/nf-core/crisprcleanr/normalize/main.nf @@ -8,12 +8,15 @@ @@ -18,7 +21,7 @@ Changes in module 'nf-core/crisprcleanr/normalize' path "versions.yml", emit: versions when: -@@ -26,20 +29,48 @@ +@@ -26,20 +29,49 @@ """ #!/usr/bin/env Rscript library(CRISPRcleanR) @@ -51,7 +54,8 @@ Changes in module 'nf-core/crisprcleanr/normalize' + rownames(library) = library[,1] + library = library[order(rownames(library)),] + library = library[,-1] -+ count_file_to_normalize <- count_file ++ names(count_file)[names(count_file) == 'Gene'] <- 'gene' ++ count_file_to_normalize <- count_file %>% dplyr::select(sgRNA, gene, everything()) + } + + normANDfcs <- ccr.NormfoldChanges(Dframe=count_file_to_normalize,saveToFig = FALSE,min_reads=${min_reads},EXPname="${prefix}", libraryAnnotation=library,display=FALSE) diff --git a/modules/nf-core/crisprcleanr/normalize/main.nf b/modules/nf-core/crisprcleanr/normalize/main.nf index d8969379..07f286f2 100644 --- a/modules/nf-core/crisprcleanr/normalize/main.nf +++ b/modules/nf-core/crisprcleanr/normalize/main.nf @@ -55,7 +55,8 @@ process CRISPRCLEANR_NORMALIZE { rownames(library) = library[,1] library = library[order(rownames(library)),] library = library[,-1] - count_file_to_normalize <- count_file + names(count_file)[names(count_file) == 'Gene'] <- 'gene' + count_file_to_normalize <- count_file %>% dplyr::select(sgRNA, gene, everything()) } normANDfcs <- ccr.NormfoldChanges(Dframe=count_file_to_normalize,saveToFig = FALSE,min_reads=${min_reads},EXPname="${prefix}", libraryAnnotation=library,display=FALSE) diff --git a/modules/nf-core/mageck/mle/mageck-mle.diff b/modules/nf-core/mageck/mle/mageck-mle.diff index 9cb169b0..06823614 100644 --- a/modules/nf-core/mageck/mle/mageck-mle.diff +++ b/modules/nf-core/mageck/mle/mageck-mle.diff @@ -1,28 +1,36 @@ Changes in module 'nf-core/mageck/mle' +'modules/nf-core/mageck/mle/environment.yml' is unchanged +'modules/nf-core/mageck/mle/meta.yml' is unchanged +Changes in 'mageck/mle/main.nf': --- modules/nf-core/mageck/mle/main.nf +++ modules/nf-core/mageck/mle/main.nf -@@ -8,8 +8,7 @@ +@@ -8,8 +8,8 @@ 'biocontainers/mageck:0.5.9.5--py39h1f90b4d_3' }" input: - tuple val(meta), path(count_table) - path(design_matrix) + tuple val(meta), path(design_matrix), path(count_table) ++ path(mle_control_sgrna) output: tuple val(meta), path("*.gene_summary.txt") , emit: gene_summary -@@ -21,7 +20,8 @@ +@@ -21,16 +21,21 @@ script: + def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" ++ def args2 = task.ext.args2 ?: '' + prefix = meta.id ?: "${meta.treatment}_vs_${meta.reference}" + def design_command = design_matrix ? "-d $design_matrix" : '' ++ def control_sgrna = mle_control_sgrna ? "--control-sgrna $mle_control_sgrna" : '' """ mageck \\ -@@ -29,8 +29,9 @@ + mle \\ $args \\ ++ $control_sgrna \\ --threads $task.cpus \\ -k $count_table \\ - -d $design_matrix \\ @@ -33,7 +41,7 @@ Changes in module 'nf-core/mageck/mle' cat <<-END_VERSIONS > versions.yml "${task.process}": -@@ -49,6 +50,5 @@ +@@ -49,6 +54,5 @@ mageck: \$(mageck -v) END_VERSIONS """ @@ -42,4 +50,7 @@ Changes in module 'nf-core/mageck/mle' -} + +} +'modules/nf-core/mageck/mle/tests/main.nf.test.snap' is unchanged +'modules/nf-core/mageck/mle/tests/tags.yml' is unchanged +'modules/nf-core/mageck/mle/tests/main.nf.test' is unchanged ************************************************************ diff --git a/modules/nf-core/mageck/mle/main.nf b/modules/nf-core/mageck/mle/main.nf index fac59c23..a7451249 100644 --- a/modules/nf-core/mageck/mle/main.nf +++ b/modules/nf-core/mageck/mle/main.nf @@ -9,6 +9,7 @@ process MAGECK_MLE { input: tuple val(meta), path(design_matrix), path(count_table) + path(mle_control_sgrna) output: tuple val(meta), path("*.gene_summary.txt") , emit: gene_summary @@ -20,13 +21,16 @@ process MAGECK_MLE { script: def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' prefix = meta.id ?: "${meta.treatment}_vs_${meta.reference}" def design_command = design_matrix ? "-d $design_matrix" : '' + def control_sgrna = mle_control_sgrna ? "--control-sgrna $mle_control_sgrna" : '' """ mageck \\ mle \\ $args \\ + $control_sgrna \\ --threads $task.cpus \\ -k $count_table \\ -n $prefix \\ diff --git a/nextflow.config b/nextflow.config index 50ca1d2e..13d5005c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,7 @@ params { library = null crisprcleanr = null contrasts = null + mle_control_sgrna = null mle_design_matrix = null count_table = null fasta = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 00042245..f06d7f35 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -191,6 +191,10 @@ "exists": true, "description": "Design matrix used for MAGeCK MLE to call essential genes under multiple conditions while considering sgRNA knockout efficiency" }, + "mle_control_sgrna": { + "type": "string", + "description": "control-sgrna file for MAGeCK MLE" + }, "contrasts": { "type": "string", "format": "file-path", diff --git a/subworkflows/local/utils_nfcore_crisprseq_pipeline/main.nf b/subworkflows/local/utils_nfcore_crisprseq_pipeline/main.nf index a95ef92f..374973d0 100644 --- a/subworkflows/local/utils_nfcore_crisprseq_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_crisprseq_pipeline/main.nf @@ -168,16 +168,24 @@ workflow INITIALISATION_CHANNEL_CREATION_SCREENING { ch_design = Channel.fromPath(params.mle_design_matrix) } + ch_biogrid = Channel.fromPath("$projectDir/assets/biogrid_hgncid_noduplicate_dropna.csv", checkIfExists: true) ch_hgnc = Channel.fromPath("$projectDir/assets/hgnc_complete_set.txt", checkIfExists: true) + if(params.mle_control_sgrna) { + ch_mle_control_sgrna = Channel.fromPath(params.mle_control_sgrna) + } else { + ch_mle_control_sgrna = [] + } emit: - library = ch_library // channel: library file - crisprcleanr = ch_crisprcleanr // channel: crisprcleanr file or value - design = ch_design // channel: design matrix file + library = ch_library // channel: library file + crisprcleanr = ch_crisprcleanr // channel: crisprcleanr file or value + design = ch_design // channel: design matrix file + mle_control_sgrna = ch_mle_control_sgrna // channel: negative control sgRNA for MAGeCK MLE biogrid = ch_biogrid // channel: biogrid hgnc = ch_hgnc // channel: hgnc + } /* diff --git a/workflows/crisprseq_screening.nf b/workflows/crisprseq_screening.nf index b25504ae..af363d0f 100644 --- a/workflows/crisprseq_screening.nf +++ b/workflows/crisprseq_screening.nf @@ -271,7 +271,7 @@ workflow CRISPRSEQ_SCREENING { }.set { ch_designed_mle } ch_mle = ch_designed_mle.combine(ch_counts) - MAGECK_MLE_MATRIX (ch_mle) + MAGECK_MLE_MATRIX (ch_mle, INITIALISATION_CHANNEL_CREATION_SCREENING.out.mle_control_sgrna) ch_versions = ch_versions.mix(MAGECK_MLE_MATRIX.out.versions) MAGECK_FLUTEMLE(MAGECK_MLE_MATRIX.out.gene_summary) ch_versions = ch_versions.mix(MAGECK_FLUTEMLE.out.versions) @@ -280,7 +280,7 @@ workflow CRISPRSEQ_SCREENING { if(params.contrasts) { MATRICESCREATION(ch_contrasts) ch_mle = MATRICESCREATION.out.design_matrix.combine(ch_counts) - MAGECK_MLE (ch_mle) + MAGECK_MLE (ch_mle, INITIALISATION_CHANNEL_CREATION_SCREENING.out.mle_control_sgrna) ch_versions = ch_versions.mix(MAGECK_MLE.out.versions) MAGECK_FLUTEMLE_CONTRASTS(MAGECK_MLE.out.gene_summary) ch_versions = ch_versions.mix(MAGECK_FLUTEMLE_CONTRASTS.out.versions) @@ -290,7 +290,7 @@ workflow CRISPRSEQ_SCREENING { } if(params.day0_label) { ch_mle = Channel.of([id: "day0"]).merge(Channel.of([[]])).merge(ch_counts) - MAGECK_MLE_DAY0 (ch_mle) + MAGECK_MLE_DAY0 (ch_mle, INITIALISATION_CHANNEL_CREATION_SCREENING.out.mle_control_sgrna) ch_versions = ch_versions.mix(MAGECK_MLE_DAY0.out.versions) MAGECK_FLUTEMLE_DAY0(MAGECK_MLE_DAY0.out.gene_summary) ch_versions = ch_versions.mix(MAGECK_FLUTEMLE_DAY0.out.versions)