From c29d5dd40c27bde786682959d5359b2687111c5d Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Mon, 14 Aug 2023 16:00:55 +0200 Subject: [PATCH 01/18] Adding Genrich peak calling --- .../merged_library_frip_score_header.txt | 2 +- ...ibrary_joint_genrich_frip_score_header.txt | 14 + ...y_joint_genrich_peak_annotation_header.txt | 9 + ...ibrary_joint_genrich_peak_count_header.txt | 10 + .../merged_library_peak_annotation_header.txt | 2 +- .../merged_library_peak_count_header.txt | 2 +- ..._library_sep_genrich_frip_score_header.txt | 14 + ...ary_sep_genrich_peak_annotation_header.txt | 9 + ..._library_sep_genrich_peak_count_header.txt | 10 + .../merged_replicate_frip_score_header.txt | 2 +- ...erged_replicate_peak_annotation_header.txt | 2 +- .../merged_replicate_peak_count_header.txt | 2 +- assets/multiqc_config.yml | 14 +- bin/plot_genrich_qc.r | 155 ++++++++++ conf/base.config | 9 +- conf/modules.config | 274 +++++++++++++++++- modules.json | 5 + modules/local/genome_blacklist_regions.nf | 14 +- modules/local/homer_detailed_ann.nf | 45 +++ modules/local/igv.nf | 12 + modules/local/multiqc.nf | 8 + modules/local/plot_genrich_qc.nf | 35 +++ modules/nf-core/bowtie2/align/main.nf | 2 +- modules/nf-core/genrich/main.nf | 69 +++++ modules/nf-core/genrich/meta.yml | 80 +++++ nextflow.config | 17 +- nextflow_schema.json | 44 +++ subworkflows/local/bam_nofilter_bamtools.nf | 66 +++++ ...am_peaks_call_qc_annotate_genrich_homer.nf | 201 +++++++++++++ subworkflows/local/prepare_genome.nf | 7 +- workflows/atacseq.nf | 266 +++++++++++++---- 31 files changed, 1319 insertions(+), 82 deletions(-) create mode 100644 assets/multiqc/merged_library_joint_genrich_frip_score_header.txt create mode 100644 assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt create mode 100644 assets/multiqc/merged_library_joint_genrich_peak_count_header.txt create mode 100644 assets/multiqc/merged_library_sep_genrich_frip_score_header.txt create mode 100644 assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt create mode 100644 assets/multiqc/merged_library_sep_genrich_peak_count_header.txt create mode 100755 bin/plot_genrich_qc.r create mode 100644 modules/local/homer_detailed_ann.nf create mode 100644 modules/local/plot_genrich_qc.nf create mode 100644 modules/nf-core/genrich/main.nf create mode 100644 modules/nf-core/genrich/meta.yml create mode 100644 subworkflows/local/bam_nofilter_bamtools.nf create mode 100644 subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf diff --git a/assets/multiqc/merged_library_frip_score_header.txt b/assets/multiqc/merged_library_frip_score_header.txt index f89a75f6..866af81d 100644 --- a/assets/multiqc/merged_library_frip_score_header.txt +++ b/assets/multiqc/merged_library_frip_score_header.txt @@ -1,5 +1,5 @@ #id: 'mlib_frip_score' -#section_name: 'MERGED LIB: MACS2 peak FRiP score' +#section_name: 'MERGED LIB: MACS2: Peak FRiP score' #description: "is generated by calculating the fraction of all mapped reads that fall # into the MACS2 called peak regions. A read must overlap a peak by at least 20% to be counted. # See FRiP score." diff --git a/assets/multiqc/merged_library_joint_genrich_frip_score_header.txt b/assets/multiqc/merged_library_joint_genrich_frip_score_header.txt new file mode 100644 index 00000000..6c5b15bc --- /dev/null +++ b/assets/multiqc/merged_library_joint_genrich_frip_score_header.txt @@ -0,0 +1,14 @@ +#id: 'mlib_joint_genrich_frip_score' +#section_name: 'MERGED LIB: Genrich (joint): Peak FRiP score' +#description: "is generated by calculating the fraction of all mapped reads that fall +# into the Genrich called peak regions. A read must overlap a peak by at least 20% to be counted. +# See FRiP score." +#plot_type: 'bargraph' +#anchor: 'mlib_frip_score' +#pconfig: +# title: 'FRiP score' +# ylab: 'FRiP score' +# ymax: 1 +# ymin: 0 +# tt_decimals: 2 +# cpswitch: False diff --git a/assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt b/assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt new file mode 100644 index 00000000..e4b361d3 --- /dev/null +++ b/assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt @@ -0,0 +1,9 @@ +#id: 'mlib_joint_genrich_peak_annotation' +#section_name: 'MERGED LIB: Genrich (joint): HOMER peak annotation' +#description: "is generated by calculating the proportion of peaks assigned to genomic features by +# HOMER annotatePeaks.pl." +#plot_type: 'bargraph' +#anchor: 'mlib_peak_annotation' +#pconfig: +# title: 'Peak to feature proportion' +# ylab: 'Peak count' diff --git a/assets/multiqc/merged_library_joint_genrich_peak_count_header.txt b/assets/multiqc/merged_library_joint_genrich_peak_count_header.txt new file mode 100644 index 00000000..1d5e014c --- /dev/null +++ b/assets/multiqc/merged_library_joint_genrich_peak_count_header.txt @@ -0,0 +1,10 @@ +#id: 'mlib_joint_genrich_peak_count' +#section_name: 'MERGED LIB: Genrich (joint): Peak count' +#description: "is calculated from total number of peaks called by +# Genrich" +#plot_type: 'bargraph' +#anchor: 'mlib_peak_count' +#pconfig: +# title: 'Total peak count' +# ylab: 'Peak count' +# cpswitch: False diff --git a/assets/multiqc/merged_library_peak_annotation_header.txt b/assets/multiqc/merged_library_peak_annotation_header.txt index 6f668c31..f3bc0ac9 100644 --- a/assets/multiqc/merged_library_peak_annotation_header.txt +++ b/assets/multiqc/merged_library_peak_annotation_header.txt @@ -1,5 +1,5 @@ #id: 'mlib_peak_annotation' -#section_name: 'MERGED LIB: HOMER peak annotation' +#section_name: 'MERGED LIB: MACS2: HOMER peak annotation' #description: "is generated by calculating the proportion of peaks assigned to genomic features by # HOMER annotatePeaks.pl." #plot_type: 'bargraph' diff --git a/assets/multiqc/merged_library_peak_count_header.txt b/assets/multiqc/merged_library_peak_count_header.txt index 60f5745c..b6075a03 100644 --- a/assets/multiqc/merged_library_peak_count_header.txt +++ b/assets/multiqc/merged_library_peak_count_header.txt @@ -1,5 +1,5 @@ #id: 'mlib_peak_count' -#section_name: 'MERGED LIB: MACS2 peak count' +#section_name: 'MERGED LIB: MACS2: Peak count' #description: "is calculated from total number of peaks called by # MACS2" #plot_type: 'bargraph' diff --git a/assets/multiqc/merged_library_sep_genrich_frip_score_header.txt b/assets/multiqc/merged_library_sep_genrich_frip_score_header.txt new file mode 100644 index 00000000..534c2373 --- /dev/null +++ b/assets/multiqc/merged_library_sep_genrich_frip_score_header.txt @@ -0,0 +1,14 @@ +#id: 'mlib_sep_genrich_frip_score' +#section_name: 'MERGED LIB: Genrich (sep): Peak FRiP score' +#description: "is generated by calculating the fraction of all mapped reads that fall +# into the Genrich called peak regions. A read must overlap a peak by at least 20% to be counted. +# See FRiP score." +#plot_type: 'bargraph' +#anchor: 'mlib_frip_score' +#pconfig: +# title: 'FRiP score' +# ylab: 'FRiP score' +# ymax: 1 +# ymin: 0 +# tt_decimals: 2 +# cpswitch: False diff --git a/assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt b/assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt new file mode 100644 index 00000000..911e3343 --- /dev/null +++ b/assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt @@ -0,0 +1,9 @@ +#id: 'mlib_sep_genrich_peak_annotation' +#section_name: 'MERGED LIB: Genrich (sep): HOMER peak annotation' +#description: "is generated by calculating the proportion of peaks assigned to genomic features by +# HOMER annotatePeaks.pl." +#plot_type: 'bargraph' +#anchor: 'mlib_peak_annotation' +#pconfig: +# title: 'Peak to feature proportion' +# ylab: 'Peak count' diff --git a/assets/multiqc/merged_library_sep_genrich_peak_count_header.txt b/assets/multiqc/merged_library_sep_genrich_peak_count_header.txt new file mode 100644 index 00000000..4cac61aa --- /dev/null +++ b/assets/multiqc/merged_library_sep_genrich_peak_count_header.txt @@ -0,0 +1,10 @@ +#id: 'mlib_sep_genrich_peak_count' +#section_name: 'MERGED LIB: Genrich (sep): Peak count' +#description: "is calculated from total number of peaks called by +# Genrich" +#plot_type: 'bargraph' +#anchor: 'mlib_peak_count' +#pconfig: +# title: 'Total peak count' +# ylab: 'Peak count' +# cpswitch: False diff --git a/assets/multiqc/merged_replicate_frip_score_header.txt b/assets/multiqc/merged_replicate_frip_score_header.txt index c0f25a5b..5a17b63f 100644 --- a/assets/multiqc/merged_replicate_frip_score_header.txt +++ b/assets/multiqc/merged_replicate_frip_score_header.txt @@ -1,5 +1,5 @@ #id: 'mrep_frip_score' -#section_name: 'MERGED REP: MACS2 peak FRiP score' +#section_name: 'MERGED REP: MACS: Peak FRiP score' #description: "is generated by calculating the fraction of all mapped reads that fall # into the MACS2 called peak regions. A read must overlap a peak by at least 20% to be counted. # See FRiP score." diff --git a/assets/multiqc/merged_replicate_peak_annotation_header.txt b/assets/multiqc/merged_replicate_peak_annotation_header.txt index d9d0eb03..e9e7a155 100644 --- a/assets/multiqc/merged_replicate_peak_annotation_header.txt +++ b/assets/multiqc/merged_replicate_peak_annotation_header.txt @@ -1,5 +1,5 @@ #id: 'mrep_peak_annotation' -#section_name: 'MERGED REP: HOMER peak annotation' +#section_name: 'MERGED REP: MACS2: HOMER peak annotation' #description: "is generated by calculating the proportion of peaks assigned to genomic features by # HOMER annotatePeaks.pl." #plot_type: 'bargraph' diff --git a/assets/multiqc/merged_replicate_peak_count_header.txt b/assets/multiqc/merged_replicate_peak_count_header.txt index e4118505..e1c7bafc 100644 --- a/assets/multiqc/merged_replicate_peak_count_header.txt +++ b/assets/multiqc/merged_replicate_peak_count_header.txt @@ -1,5 +1,5 @@ #id: 'mrep_peak_count' -#section_name: 'MERGED REP: MACS2 Peak count' +#section_name: 'MERGED REP: MACS2: Peak count' #description: "is calculated from total number of peaks called by # MACS2" #plot_type: 'bargraph' diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index f902256a..a2d9c29a 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -91,8 +91,14 @@ module_order: - "./macs2/merged_replicate/featurecounts/*.summary" report_section_order: - mlib_peak_count: + mlib_sep_genrich_peak_count: before: mlib_deeptools + mlib_sep_genrich_frip_score: + before: mlib_sep_genrich_peak_count + mlib_sep_genrich_peak_annotation: + before: mlib_sep_genrich_frip_score + mlib_peak_count: + before: mlib_sep_genrich_peak_annotation mlib_frip_score: before: mlib_peak_count mlib_peak_annotation: @@ -115,6 +121,12 @@ report_section_order: before: mrep_featurecounts mrep_deseq2_clustering_1: before: mrep_deseq2_pca_1 + mlib_joint_genrich_peak_count: + before: mrep_deseq2_clustering_1 + mlib_joint_genrich_frip_score: + before: mlib_joint_genrich_peak_count + mlib_joint_genrich_peak_annotation: + before: mlib_joint_genrich_frip_score "nf-core-atacseq-methods-description": order: -1000 software_versions: diff --git a/bin/plot_genrich_qc.r b/bin/plot_genrich_qc.r new file mode 100755 index 00000000..c8bd38b9 --- /dev/null +++ b/bin/plot_genrich_qc.r @@ -0,0 +1,155 @@ +#!/usr/bin/env Rscript + +################################################ +################################################ +## LOAD LIBRARIES ## +################################################ +################################################ + +library(optparse) +library(ggplot2) +library(reshape2) +library(scales) + +################################################ +################################################ +## PARSE COMMAND-LINE PARAMETERS ## +################################################ +################################################ + +option_list <- list(make_option(c("-i", "--peak_files"), type="character", default=NULL, help="Comma-separated list of peak files.", metavar="path"), + make_option(c("-s", "--sample_ids"), type="character", default=NULL, help="Comma-separated list of sample ids associated with peak files. Must be unique and in same order as peaks files input.", metavar="string"), + make_option(c("-o", "--outdir"), type="character", default='./', help="Output directory", metavar="path"), + make_option(c("-p", "--outprefix"), type="character", default='genrich_peakqc', help="Output prefix", metavar="string")) + +opt_parser <- OptionParser(option_list=option_list) +opt <- parse_args(opt_parser) + +if (is.null(opt$peak_files)){ + print_help(opt_parser) + stop("At least one peak file must be supplied", call.=FALSE) +} +if (is.null(opt$sample_ids)){ + print_help(opt_parser) + stop("Please provide sample ids associated with peak files.", call.=FALSE) +} + +if (file.exists(opt$outdir) == FALSE) { + dir.create(opt$outdir,recursive=TRUE) +} + +PeakFiles <- unlist(strsplit(opt$peak_files,",")) +SampleIDs <- unlist(strsplit(opt$sample_ids,",")) +if (length(PeakFiles) != length(SampleIDs)) { + print_help(opt_parser) + stop("Number of sample ids must equal number of homer annotated files.", call.=FALSE) +} + +################################################ +################################################ +## READ IN DATA ## +################################################ +################################################ + +plot.dat <- data.frame() +summary.dat <- data.frame() +for (idx in 1:length(PeakFiles)) { + + sampleid = SampleIDs[idx] + isNarrow <- FALSE + header <- c("chrom","start","end","name","pileup", "strand", "fold", "-log10(pvalue)","-log10(qvalue)") + fsplit <- unlist(strsplit(basename(PeakFiles[idx]), split='.',fixed=TRUE)) + if (fsplit[length(fsplit)] == 'narrowPeak') { + isNarrow <- TRUE + header <- c(header,"summit") + } + peaks <- read.table(PeakFiles[idx], sep="\t", header=FALSE) + colnames(peaks) <- header + + ## GET SUMMARY STATISTICS + peaks.dat <- peaks[,c('fold','-log10(qvalue)','-log10(pvalue)')] + peaks.dat$length <- (peaks$end - peaks$start) + for (cname in colnames(peaks.dat)) { + sdat <- summary(peaks.dat[,cname]) + sdat["num_peaks"] <- nrow(peaks.dat) + sdat["measure"] <- cname + sdat["sample"] <- sampleid + sdat <- t(data.frame(x=matrix(sdat),row.names=names(sdat))) + summary.dat <- rbind(summary.dat,sdat) + } + colnames(peaks.dat) <- c('fold','fdr','pvalue','length') + peaks.dat$name <- rep(sampleid,nrow(peaks.dat)) + plot.dat <- rbind(plot.dat,peaks.dat) +} +plot.dat$name <- factor(plot.dat$name, levels=sort(unique(as.character(plot.dat$name)))) + +SummaryFile <- file.path(opt$outdir,paste(opt$outprefix,".summary.txt",sep="")) +write.table(summary.dat,file=SummaryFile,quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE) + +################################################ +################################################ +## PLOTS ## +################################################ +################################################ + +## RETURNS VIOLIN PLOT OBJECT +violin.plot <- function(plot.dat,x,y,ylab,title,log) { + + plot <- ggplot(plot.dat, aes_string(x=x, y=y)) + + geom_violin(aes_string(colour=x,fill=x), alpha = 0.3) + + geom_boxplot(width=0.1) + + xlab("") + + ylab(ylab) + + ggtitle(title) + + theme(legend.position="none", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.text.y = element_text(colour="black"), + axis.text.x= element_text(colour="black",face="bold"), + axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), + axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) + if (log == 10) { + plot <- plot + scale_y_continuous(trans='log10',breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + } + if (log == 2) { + plot <- plot + scale_y_continuous(trans='log2',breaks = trans_breaks("log2", function(x) 2^x), labels = trans_format("log2", math_format(2^.x))) + } + return(plot) +} + +############################ + +PlotFile <- file.path(opt$outdir,paste(opt$outprefix,".plots.pdf",sep="")) +pdf(PlotFile,height=6,width=3*length(unique(plot.dat$name))) + +## PEAK COUNT PLOT +peak.count.dat <- as.data.frame(table(plot.dat$name)) +colnames(peak.count.dat) <- c("name","count") +plot <- ggplot(peak.count.dat, aes(x=name, y=count)) + + geom_bar(stat="identity",aes(colour=name,fill=name), position = "dodge", width = 0.8, alpha = 0.3) + + xlab("") + + ylab("Number of peaks") + + ggtitle("Peak count") + + theme(legend.position="none", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.text.y = element_text(colour="black"), + axis.text.x= element_text(colour="black",face="bold"), + axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), + axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) + + geom_text(aes(label = count, x = name, y = count), position = position_dodge(width = 0.8), vjust = -0.6) +print(plot) + +## VIOLIN PLOTS +print(violin.plot(plot.dat=plot.dat,x="name",y="length",ylab=expression(log[10]*" peak length"),title="Peak length distribution",log=10)) +print(violin.plot(plot.dat=plot.dat,x="name",y="fold",ylab=expression(log[2]*" fold-enrichment"),title="Fold-change distribution",log=2)) +print(violin.plot(plot.dat=plot.dat,x="name",y="fdr",ylab=expression(-log[10]*" qvalue"),title="FDR distribution",log=-1)) +print(violin.plot(plot.dat=plot.dat,x="name",y="pvalue",ylab=expression(-log[10]*" pvalue"),title="Pvalue distribution",log=-1)) +dev.off() + +################################################ +################################################ +################################################ +################################################ diff --git a/conf/base.config b/conf/base.config index f2ac8d7a..b27712d8 100755 --- a/conf/base.config +++ b/conf/base.config @@ -35,18 +35,23 @@ process { time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } + cpus = { check_max( 4 * task.attempt, 'cpus' ) } memory = { check_max( 36.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } + cpus = { check_max( 8 * task.attempt, 'cpus' ) } memory = { check_max( 72.GB * task.attempt, 'memory' ) } time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { time = { check_max( 20.h * task.attempt, 'time' ) } } + withLabel:process_ultra { + cpus = { check_max( 8 * task.attempt, 'cpus' ) } + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + time = { check_max( 72.h * task.attempt, 'time' ) } + } withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } } diff --git a/conf/modules.config b/conf/modules.config index 2da820b5..bd6d4a01 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -261,7 +261,8 @@ if (params.aligner == 'bowtie2') { ext.args = { [ meta.read_group ? "--rg-id ${meta.id} --rg SM:${meta.id - ~/_T\d+$/} --rg PL:ILLUMINA --rg LB:${meta.id} --rg PU:1" : '', - params.seq_center ? "--rg CN:${params.seq_center}" : '' + params.seq_center ? "--rg CN:${params.seq_center}" : '', + params.analyze_multimappers != 0 ? "-k ${params.analyze_multimappers}" : '' ].join(' ').trim() } ext.prefix = { "${meta.id}.Lb" } @@ -395,7 +396,7 @@ process { [ meta.single_end ? '-F 0x004' : '-F 0x004 -F 0x0008 -f 0x001', params.keep_dups ? '' : '-F 0x0400', - params.keep_multi_map ? '' : '-q 1' + params.keep_multi_map ? '' : '-q 1 -F 4 -F 256' ].join(' ').trim() } ext.prefix = { meta.single_end ? "${meta.id}.mLb.clN.sorted" : "${meta.id}.mLb.flT.sorted" } @@ -590,7 +591,7 @@ if (!params.skip_plot_fingerprint) { } process { - withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:MACS2_CALLPEAK' { + withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:MACS2_CALLPEAK' { ext.args = [ '--keep-dup all', '--nomodel', @@ -607,7 +608,7 @@ process { ] } - withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:FRIP_SCORE' { + withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:FRIP_SCORE' { ext.args = '-bed -c -f 0.20' publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, @@ -615,7 +616,7 @@ process { ] } - withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:MULTIQC_CUSTOM_PEAKS' { + withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:MULTIQC_CUSTOM_PEAKS' { ext.prefix = { "${meta.id}.mLb.clN_peaks" } publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, @@ -627,7 +628,7 @@ process { if (!params.skip_peak_annotation) { process { - withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:HOMER_ANNOTATEPEAKS' { + withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:HOMER_ANNOTATEPEAKS' { ext.args = '-gid' ext.prefix = { "${meta.id}.mLb.clN_peaks" } publishDir = [ @@ -640,7 +641,7 @@ if (!params.skip_peak_annotation) { if (!params.skip_peak_qc) { process { - withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:PLOT_MACS2_QC' { + withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:PLOT_MACS2_QC' { ext.args = '-o ./ -p macs2_peak.mLb.clN' publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, @@ -649,7 +650,7 @@ if (!params.skip_peak_annotation) { ] } - withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS:PLOT_HOMER_ANNOTATEPEAKS' { + withName: '.*:MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2:PLOT_HOMER_ANNOTATEPEAKS' { ext.args = '-o ./' ext.prefix = 'macs2_annotatePeaks.mLb.clN' publishDir = [ @@ -719,6 +720,166 @@ if (!params.skip_peak_annotation) { } } +process { + + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.mLb.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] + } + + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.mLb.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" }, + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] + } + + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:SAMTOOLS_SORT' { + ext.args = '-n' + ext.prefix = { "${meta.id}.mLb.namesorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ] + } + + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:BAM_SORT_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.mLb.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" }, + mode: params.publish_dir_mode, + pattern: "*.{bam,bai}" + ] + } + + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.mLb.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" }, + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + } + + withName: '.*:MERGED_LIBRARY_NF_BAM_TO_BIGWIG:BEDTOOLS_GENOMECOV' { + ext.args = { (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' } + ext.prefix = { "${meta.id}.mLb" } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/bigwig" }, + mode: params.publish_dir_mode, + pattern: "*.bigWig" + ], + [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/bigwig/scale" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ] + ] + } + + withName: '.*:MERGED_LIBRARY_NF_BAM_TO_BIGWIG:UCSC_BEDGRAPHTOBIGWIG' { + ext.prefix = { "${meta.id}.mLb" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/bigwig" }, + mode: params.publish_dir_mode, + pattern: "*.bigWig" + ] + } +} + +process { + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' { + ext.prefix = { "${meta.id}.mLb" } + ext.args = [ + '-j', + params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '', + params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '', + params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : '' + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:FRIP_SCORE' { + ext.args = '-bed -c -f 0.20' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + enabled: false + ] + } + + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:MULTIQC_CUSTOM_PEAKS' { + ext.prefix = { "${meta.id}.mLb.genrich.speaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + if (!params.skip_peak_annotation) { + process { + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' { + ext.args = '-gid' + ext.prefix = { "${meta.id}.mLb.genrich.speaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (params.homer_detail_annotation) { + process { + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_DETAIL_ANNOTATEPEAKS' { + ext.prefix = { "${meta.id}.mLb.genrich.speaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_peak_qc) { + process { + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { + ext.args = '-o ./ -p genrich_speak.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' { + ext.args = '-o ./' + ext.prefix = 'genrich_annotate_sPeaks.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + } +} + if (!params.skip_ataqv) { process { withName: 'MERGED_LIBRARY_ATAQV_ATAQV' { @@ -816,7 +977,7 @@ if (!params.skip_merge_replicates) { } process { - withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:MACS2_CALLPEAK' { + withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:MACS2_CALLPEAK' { ext.args = [ '--keep-dup all', '--nomodel', @@ -831,7 +992,7 @@ if (!params.skip_merge_replicates) { ] } - withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:FRIP_SCORE' { + withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:FRIP_SCORE' { ext.args = '-bed -c -f 0.20' publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_replicate/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, @@ -839,7 +1000,7 @@ if (!params.skip_merge_replicates) { ] } - withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:MULTIQC_CUSTOM_PEAKS' { + withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:MULTIQC_CUSTOM_PEAKS' { ext.prefix = { "${meta.id}.mRp.clN_peaks" } publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_replicate/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, @@ -851,7 +1012,7 @@ if (!params.skip_merge_replicates) { if (!params.skip_peak_annotation ) { process { - withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:HOMER_ANNOTATEPEAKS' { + withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:HOMER_ANNOTATEPEAKS' { ext.args = '-gid' ext.prefix = { "${meta.id}.mRp.clN_peaks" } publishDir = [ @@ -864,7 +1025,7 @@ if (!params.skip_merge_replicates) { if (!params.skip_peak_qc) { process { - withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:PLOT_MACS2_QC' { + withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:PLOT_MACS2_QC' { ext.args = '-o ./ -p macs2_peak.mRp.clN' publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_replicate/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, @@ -873,7 +1034,7 @@ if (!params.skip_merge_replicates) { ] } - withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS:PLOT_HOMER_ANNOTATEPEAKS' { + withName: '.*:MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2:PLOT_HOMER_ANNOTATEPEAKS' { ext.args = '-o ./' ext.prefix = 'macs2_annotatePeaks.mRp.clN' publishDir = [ @@ -943,6 +1104,91 @@ if (!params.skip_merge_replicates) { } } + + +process { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' { + ext.prefix = { "${meta.id}.mLb" } + ext.args = [ '-j', + params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '', + params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '', + params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : '' + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:FRIP_SCORE' { + ext.args = '-bed -c -f 0.20' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + enabled: false + ] + } + + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:MULTIQC_CUSTOM_PEAKS' { + ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} + +if (!params.skip_peak_annotation) { + process { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' { + ext.args = '-gid' + ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (params.homer_detail_annotation) { + process { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_DETAIL_ANNOTATEPEAKS' { + ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_peak_qc) { + process { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { + ext.args = '-o ./ -p genrich_jpeak.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' { + ext.args = '-o ./' + ext.prefix = 'genrich_annotate_jPeaks.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} + if (!params.skip_igv) { process { withName: 'IGV' { diff --git a/modules.json b/modules.json index 962c8dc3..c2358971 100644 --- a/modules.json +++ b/modules.json @@ -80,6 +80,11 @@ "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", "installed_by": ["fastq_fastqc_umitools_trimgalore"] }, + "genrich": { + "branch": "master", + "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "installed_by": ["modules"] + }, "gffread": { "branch": "master", "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", diff --git a/modules/local/genome_blacklist_regions.nf b/modules/local/genome_blacklist_regions.nf index 74a7936f..b8740ed4 100644 --- a/modules/local/genome_blacklist_regions.nf +++ b/modules/local/genome_blacklist_regions.nf @@ -13,19 +13,22 @@ process GENOME_BLACKLIST_REGIONS { val keep_mito output: - path '*.bed' , emit: bed - path "versions.yml", emit: versions + path '*.whitelist_regions.bed', emit: whitelist_bed + path '*.blacklist_regions.bed', emit: blacklist_bed + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when script: - def file_out = "${sizes.simpleName}.include_regions.bed" + def whitelist = "${sizes.simpleName}.whitelist_regions.bed" + def blacklist_out = "${sizes.simpleName}.blacklist_regions.bed" def name_filter = mito_name ? "| awk '\$1 !~ /${params.mito_name}/ {print \$0}'": '' def mito_filter = keep_mito ? '' : name_filter if (blacklist) { """ - sortBed -i $blacklist -g $sizes | complementBed -i stdin -g $sizes $mito_filter > $file_out + sortBed -i $blacklist -g $sizes | complementBed -i stdin -g $sizes $mito_filter > $whitelist + complementBed -i $whitelist -g $sizes > $blacklist_out cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -34,7 +37,8 @@ process GENOME_BLACKLIST_REGIONS { """ } else { """ - awk '{print \$1, '0' , \$2}' OFS='\t' $sizes $mito_filter > $file_out + awk '{print \$1, '0' , \$2}' OFS='\t' $sizes $mito_filter > $whitelist + complementBed -i $whitelist -g $sizes > $blacklist_out cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/homer_detailed_ann.nf b/modules/local/homer_detailed_ann.nf new file mode 100644 index 00000000..5d9ae345 --- /dev/null +++ b/modules/local/homer_detailed_ann.nf @@ -0,0 +1,45 @@ +process HOMER_DETAIL_ANNOTATEPEAKS { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + // configureHomer.pl does not work with docker or singularity, so we use conda (see: https://github.com/bioconda/bioconda-recipes/blob/master/recipes/homer/README.txt) + conda "bioconda::homer=4.11" + + input: + tuple val(meta), path(peak) + val genome + + output: + tuple val(meta), path("*annotatePeaks.detailed.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '4.11' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + installed=\$(perl \$CONDA_PREFIX/share/homer*/configureHomer.pl -list) + line=\$(echo "\$installed" | grep "$genome") + yes_or_no=\${line:0:1} + + if [ \$yes_or_no != "+" ]; then + perl \$CONDA_PREFIX/share/homer*/configureHomer.pl -install $genome + fi + + annotatePeaks.pl \\ + $peak \\ + $genome \\ + $args \\ + -cpu $task.cpus \\ + > ${prefix}.annotatePeaks.detailed.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + homer: $VERSION + END_VERSIONS + """ +} diff --git a/modules/local/igv.nf b/modules/local/igv.nf index d060d0e7..be2cf8f4 100644 --- a/modules/local/igv.nf +++ b/modules/local/igv.nf @@ -14,6 +14,11 @@ process IGV { path ("${bigwig_replicate_publish_dir}/*") path ("${peak_replicate_publish_dir}/*") path ("${consensus_replicate_publish_dir}/*") + + path ("${bigwig_library_nf_publish_dir}/*") + path ("${peak_library_s_genrich_publish_dir}/*") + path ("${peak_library_j_genrich_publish_dir}/*") + val bigwig_library_publish_dir val peak_library_publish_dir val consensus_library_publish_dir @@ -21,6 +26,10 @@ process IGV { val peak_replicate_publish_dir val consensus_replicate_publish_dir + val bigwig_library_nf_publish_dir + val peak_library_s_genrich_publish_dir + val peak_library_j_genrich_publish_dir + output: // Publish fasta file while copyTo fails when the source and destination buckets are in different regions path "*files.txt" , emit: txt @@ -40,6 +49,9 @@ process IGV { find * -type l -name "*.bigWig" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$bigwig_replicate_publish_dir" || test \$? = 1; } > mRp_bigwig.igv.txt find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$peak_replicate_publish_dir" || test \$? = 1; } > mRp_peaks.igv.txt find * -type l -name "*.bed" -exec echo -e ""{}"\\t0,0,0" \\; | { grep "^$consensus_replicate_publish_dir" || test \$? = 1; } > mRp_bed.igv.txt + find * -type l -name "*.bigWig" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$bigwig_library_nf_publish_dir" || test \$? = 1; } > mLb_nf_bigwig.igv.txt + find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$peak_library_s_genrich_publish_dir" || test \$? = 1; } > mLb_s_genrich_peaks.igv.txt + find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$peak_library_j_genrich_publish_dir" || test \$? = 1; } > mLb_j_genrich_peaks.igv.txt cat *.txt > igv_files.txt igv_files_to_session.py igv_session.xml igv_files.txt ../../genome/${fasta.getName()} --path_prefix '../../' diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf index 6064fd7d..676332dd 100644 --- a/modules/local/multiqc.nf +++ b/modules/local/multiqc.nf @@ -40,6 +40,10 @@ process MULTIQC { path ('macs2/merged_library/annotation/*') path ('macs2/merged_library/featurecounts/*') + path ('genrich/merged_library/sep/peaks/*') + path ('genrich/merged_library/sep/peaks/*') + path ('genrich/merged_library/sep/annotation/*') + path ('alignment/merged_replicate/*') path ('alignment/merged_replicate/*') path ('alignment/merged_replicate/*') @@ -55,6 +59,10 @@ process MULTIQC { path ('deseq2_replicate/*') path ('deseq2_replicate/*') + path ('genrich/merged_library/joint/peaks/*') + path ('genrich/merged_library/joint/peaks/*') + path ('genrich/merged_library/joint/annotation/*') + output: path "*multiqc_report.html", emit: report path "*_data" , emit: data diff --git a/modules/local/plot_genrich_qc.nf b/modules/local/plot_genrich_qc.nf new file mode 100644 index 00000000..a0d5ae98 --- /dev/null +++ b/modules/local/plot_genrich_qc.nf @@ -0,0 +1,35 @@ +process PLOT_GENRICH_QC { + label 'process_medium' + + conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0': + 'biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + + input: + path peaks + val is_narrow_peak + + output: + path '*.txt' , emit: txt + path '*.pdf' , emit: pdf + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/atacseq/bin/ + def args = task.ext.args ?: '' + def peak_type = is_narrow_peak ? 'narrowPeak' : 'broadPeak' + """ + plot_genrich_qc.r \\ + -i ${peaks.join(',')} \\ + -s ${peaks.join(',').replaceAll("_peaks.${peak_type}","")} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf index a77114d2..74fcc7c7 100644 --- a/modules/nf-core/bowtie2/align/main.nf +++ b/modules/nf-core/bowtie2/align/main.nf @@ -1,6 +1,6 @@ process BOWTIE2_ALIGN { tag "$meta.id" - label "process_high" + label "process_ultra" conda "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/genrich/main.nf b/modules/nf-core/genrich/main.nf new file mode 100644 index 00000000..69a6bd8c --- /dev/null +++ b/modules/nf-core/genrich/main.nf @@ -0,0 +1,69 @@ +process GENRICH { + tag "$meta.id" + label 'process_high' + + conda "bioconda::genrich=0.6.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/genrich:0.6.1--h5bf99c6_1' : + 'biocontainers/genrich:0.6.1--h5bf99c6_1' }" + + input: + tuple val(meta), path(treatment_bam), path(control_bam) + path blacklist_bed + val save_pvalues + val save_pileup + val save_bed + val save_duplicates + + output: + tuple val(meta), path("*.narrowPeak") , emit: peak + tuple val(meta), path("*pvalues.bedGraph"), optional:true, emit: bedgraph_pvalues + tuple val(meta), path("*pileup.bedGraph"), optional:true, emit: bedgraph_pileup + tuple val(meta), path("*intervals.bed"), optional:true, emit: bed_intervals + tuple val(meta), path("*duplicates.txt"), optional:true, emit: duplicates + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def layout = (!args.contains("-y") && meta.single_end) ? "-y" : "" + def treatment = treatment_bam ? "-t ${treatment_bam.sort().join(',')}" : "" + def control = control_bam ? "-c ${control_bam.sort().join(',')}" : "" + def blacklist = blacklist_bed ? "-E $blacklist_bed" : "" + def pvalues = save_pvalues ? "-f ${prefix}.pvalues.bedGraph" : "" + def pileup = save_pileup ? "-k ${prefix}.pileup.bedGraph" : "" + def bed = save_bed ? "-b ${prefix}.intervals.bed" : "" + def duplicates = "" + + if (save_duplicates) { + if (args.contains('-r')) { + duplicates = "-R ${prefix}.duplicates.txt" + } else { + log.info '[Genrich] Duplicates can only be saved if they are filtered, defaulting to -r option (Remove PCR duplicates).' + duplicates = "-r -R ${prefix}.duplicates.txt" + } + } + + """ + Genrich \\ + $treatment \\ + $args \\ + $layout \\ + $blacklist \\ + -o ${prefix}.narrowPeak \\ + $pvalues \\ + $pileup \\ + $bed \\ + $duplicates \\ + $control + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + genrich: \$(echo \$(Genrich --version 2>&1) | sed 's/^Genrich, version //; s/ .*\$//') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/genrich/meta.yml b/modules/nf-core/genrich/meta.yml new file mode 100644 index 00000000..414821f6 --- /dev/null +++ b/modules/nf-core/genrich/meta.yml @@ -0,0 +1,80 @@ +name: genrich +description: Peak-calling for ChIP-seq and ATAC-seq enrichment experiments +keywords: + - peak-calling + - ChIP-seq + - ATAC-seq +tools: + - genrich: + description: | + Genrich is a peak-caller for genomic enrichment assays (e.g. ChIP-seq, ATAC-seq). + It analyzes alignment files generated following the assay and produces a file + detailing peaks of significant enrichment. + homepage: https://github.com/jsh58/Genrich + documentation: https://github.com/jsh58/Genrich#readme + tool_dev_url: https://github.com/jsh58/Genrich + + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - treatment_bam: + type: file + description: Coordinate sorted BAM/SAM file from treatment sample or list of BAM/SAM files from biological replicates + pattern: "*.{bam,sam}" + - control_bam: + type: file + description: Coordinate sorted BAM/SAM file from control sample or list of BAM/SAM files from control samples + pattern: "*.{bam,sam}" + - blacklist_bed: + type: file + description: Bed file containing genomic intervals to exclude from the analysis + pattern: "*.{bed}" + - save_pvalues: + type: boolean + description: Create bedgraph-ish file for p/q-values file + - save_pileup: + type: boolean + description: Create bedgraph-ish file for pileups and p-values + - save_bed: + type: boolean + description: Create BED file for reads/fragments/intervals + - save_duplicates: + type: boolean + description: Create PCR duplicates file (only works if -r option is set) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - peaks: + type: file + description: Output file is in ENCODE narrowPeak format + pattern: "*.{narrowPeak}" + - bedgraph_pvalues: + type: file + description: bedGraph file containing p/q values + pattern: "*.{pvalues.bedGraph}" + - bedgraph_pileup: + type: file + description: bedGraph file containing pileups and p-values + pattern: "*.{pileup.bedGraph}" + - bed_intervals: + type: file + description: Bed file containing annotated intervals + pattern: "*.{intervals.bed}" + - duplicates: + type: file + description: Text output file containing intervals corresponding to PCR duplicates + pattern: "*.{intervals.txt}" + - version: + type: file + description: File containing software version + pattern: "*.{version.txt}" +authors: + - "@JoseEspinosa" + - "@samuelruizperez" diff --git a/nextflow.config b/nextflow.config index 9f60b156..007036aa 100644 --- a/nextflow.config +++ b/nextflow.config @@ -43,18 +43,31 @@ params { skip_merge_replicates = false save_align_intermeds = false save_unaligned = false + analyze_multimappers = 0 + // Options: Peaks narrow_peak = false + skip_peak_qc = false + skip_peak_annotation = false + + // Options: MACS2 broad_cutoff = 0.1 macs_fdr = null macs_pvalue = null min_reps_consensus = 1 save_macs_pileup = false - skip_peak_qc = false - skip_peak_annotation = false skip_consensus_peaks = false + // Options: Genrich + genrich_qvalue = 0.01 + genrich_pvalue = 0.01 + save_genrich_pileup = true + save_genrich_pvalues = true + save_genrich_bed = true + save_genrich_duplicates = true + homer_detail_annotation = false + // Options: DESeq2 QC deseq2_vst = true skip_deseq2_qc = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 2e384c9e..448713b8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -325,6 +325,13 @@ "hidden": true, "description": "BAMTools JSON file with custom filters for single-end data.", "fa_icon": "fas fa-cog" + }, + "analyze_multimappers": { + "type": "integer", + "default": 0, + "description": "Output this number of multimapping reads from the alignment step and include them in peak calling.", + "help_text": "This will include non-unique reads in the alignment (Bowtie2 and STAR) and peak calling steps (Genrich). Default of 0 means only the best alignment is use and multimappers are ignored.", + "fa_icon": "fas fa-chart-bar" } } }, @@ -382,6 +389,43 @@ "type": "boolean", "description": "Skip consensus peak generation, annotation and counting.", "fa_icon": "fas fa-fast-forward" + }, + "save_genrich_pileup": { + "type": "boolean", + "description": "Instruct Genrich to create bedGraph files normalised to signal per million reads.", + "fa_icon": "fas fa-save" + }, + "save_genrich_pvalues": { + "type": "boolean", + "description": "Instruct Genrich to create bedGraph files with p-values.", + "fa_icon": "fas fa-save" + }, + "save_genrich_bed": { + "type": "boolean", + "description": "Instruct Genrich to create BED files.", + "fa_icon": "fas fa-save" + }, + "save_genrich_duplicates": { + "type": "boolean", + "description": "Instruct Genrich to save duplicates.", + "fa_icon": "fas fa-save" + }, + "genrich_qvalue": { + "type": "number", + "default": 0.1, + "description": "Minimum q-value cutoff for peak detection.", + "fa_icon": "fas fa-sort-amount-down" + }, + "genrich_pvalue": { + "type": "number", + "default": 0.1, + "description": "Minimum p-value cutoff for peak detection.", + "fa_icon": "fas fa-sort-amount-down" + }, + "homer_detail_annotation": { + "type": "boolean", + "description": "Annotate peaks with HOMER in detailed mode.", + "fa_icon": "fas fa-search-plus" } } }, diff --git a/subworkflows/local/bam_nofilter_bamtools.nf b/subworkflows/local/bam_nofilter_bamtools.nf new file mode 100644 index 00000000..9c279195 --- /dev/null +++ b/subworkflows/local/bam_nofilter_bamtools.nf @@ -0,0 +1,66 @@ +include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools/main' +include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools/main' + +workflow BAM_NOFILTER_BAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ fasta ] + + main: + + ch_versions = Channel.empty() + + ch_bam + .branch { + meta, bam -> + single_end: meta.single_end + return [ meta, bam ] + paired_end: !meta.single_end + return [ meta, bam ] + } + .set { ch_bam_b } + + // + // Index SE BAM file + // + SAMTOOLS_INDEX { + ch_bam_b.single_end + } + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + // + // Run samtools stats, flagstat and idxstats on SE BAM + // + BAM_STATS_SAMTOOLS ( + ch_bam_b.single_end.join(SAMTOOLS_INDEX.out.bai), + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions.first()) + + // + // Name sort PE BAM before filtering with pysam + // + SAMTOOLS_SORT ( + ch_bam_b.paired_end + ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + // + // Sort, index PE BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( + SAMTOOLS_SORT.out.bam, + ch_fasta + ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions.first()) + + emit: + name_bam = SAMTOOLS_SORT.out.bam.mix(ch_bam_b.single_end) // channel: [ val(meta), [ bam ] ] + bam = BAM_SORT_STATS_SAMTOOLS.out.bam.mix(ch_bam_b.single_end) // channel: [ val(meta), [ bam ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats.mix(BAM_STATS_SAMTOOLS.out.stats) // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat.mix(BAM_STATS_SAMTOOLS.out.flagstat) // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats.mix(BAM_STATS_SAMTOOLS.out.idxstats) // channel: [ val(meta), [ idxstats ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf new file mode 100644 index 00000000..90e8c105 --- /dev/null +++ b/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf @@ -0,0 +1,201 @@ +// +// Call peaks with Genrich, annotate with HOMER and perform downstream QC +// + +include { GENRICH } from '../../modules/nf-core/genrich/main' +include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main' +include { HOMER_DETAIL_ANNOTATEPEAKS } from '../../modules/local/homer_detailed_ann' + +include { FRIP_SCORE } from '../../modules/local/frip_score' +include { MULTIQC_CUSTOM_PEAKS } from '../../modules/local/multiqc_custom_peaks' + +include { PLOT_GENRICH_QC } from '../../modules/local/plot_genrich_qc' + +include { PLOT_HOMER_ANNOTATEPEAKS } from '../../modules/local/plot_homer_annotatepeaks' + +workflow BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER { + take: + ch_bam // channel: [ val(meta), [ treatment_bam ], [ control_bam ] ] + ch_fasta // channel: [ fasta ] + ch_gtf // channel: [ gtf ] + genome // string: genome name + ch_blacklist_regions // channel: [ bed ] + annotate_peaks_suffix // string: suffix for input HOMER annotate peaks files to be trimmed off + ch_peak_count_header_multiqc // channel: [ header_file ] + ch_frip_score_multiqc // channel: [ header_file ] + ch_peak_annotation_header_multiqc // channel: [ header_file ] +// add broad_peak options + is_narrow_peak // boolean: true/false + skip_peak_annotation // boolean: true/false + skip_peak_qc // boolean: true/false + + save_pvalues // boolean: true/false + save_pileup // boolean: true/false + save_bed // boolean: true/false + save_duplicates // boolean: true/false + detailed_annotation // boolean: true/false + + + main: + + ch_versions = Channel.empty() + + // + // Call peaks with Genrich + // + GENRICH ( + ch_bam, + ch_blacklist_regions, + save_pvalues, + save_pileup, + save_bed, + save_duplicates + + ) + ch_versions = ch_versions.mix(GENRICH.out.versions.first()) + + // + // Filter out samples with 0 Genrich peaks called + // + GENRICH + .out + .peak + .filter { + meta, peaks -> + peaks.size() > 0 + } + .set { ch_genrich_peaks } + + + // Create channels: [ meta, ip_bam, peaks ] + + ch_bam + .join(ch_genrich_peaks, by: [0]) + .map { + meta, treatment_bam, control_bam, peaks -> + [ meta, treatment_bam, peaks ] + } + .set { ch_bam_peaks } + + // split ch_bam_peaks by treatment_bam + + ch_bam_peaks + .transpose() + .map { + meta, treatment_bam, peaks -> + def meta_clone = meta.clone() + meta_clone.id = treatment_bam.getSimpleName() + [ meta_clone, treatment_bam, peaks ] + } + .set { ch_bam_peaks_treatment_bam } + + // + // Calculate FRiP score + // + FRIP_SCORE ( + ch_bam_peaks_treatment_bam + ) + ch_versions = ch_versions.mix(FRIP_SCORE.out.versions.first()) + + // Create channels: [ meta, peaks, frip ] + ch_bam_peaks_treatment_bam + .join(FRIP_SCORE.out.txt, by: [0]) + .map { + meta, ip_bam, peaks, frip -> + [ meta, peaks, frip ] + } + .set { ch_bam_peak_frip } + + // + // FRiP score custom content for MultiQC + // + MULTIQC_CUSTOM_PEAKS ( + ch_bam_peak_frip, + ch_peak_count_header_multiqc, + ch_frip_score_multiqc + ) + ch_versions = ch_versions.mix(MULTIQC_CUSTOM_PEAKS.out.versions.first()) + + ch_homer_annotatepeaks = Channel.empty() + ch_homer_det_annotatepeaks = Channel.empty() + ch_plot_genrich_qc_txt = Channel.empty() + ch_plot_genrich_qc_pdf = Channel.empty() + ch_plot_homer_annotatepeaks_txt = Channel.empty() + ch_plot_homer_annotatepeaks_pdf = Channel.empty() + ch_plot_homer_annotatepeaks_tsv = Channel.empty() + if (!skip_peak_annotation) { + // + // Annotate peaks with HOMER + // + HOMER_ANNOTATEPEAKS ( + ch_genrich_peaks, + ch_fasta, + ch_gtf + ) + ch_homer_annotatepeaks = HOMER_ANNOTATEPEAKS.out.txt + ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS.out.versions.first()) + + if (detailed_annotation) { + // + // Annotate peaks with HOMER (detailed) + // + if (genome) { + HOMER_DETAIL_ANNOTATEPEAKS ( + ch_genrich_peaks, + genome + ) + ch_homer_det_annotatepeaks = HOMER_DETAIL_ANNOTATEPEAKS.out.txt + ch_versions = ch_versions.mix(HOMER_DETAIL_ANNOTATEPEAKS.out.versions.first()) + } + } + + if (!skip_peak_qc) { + // + // Genrich QC plots with R + // + PLOT_GENRICH_QC ( + ch_genrich_peaks.collect{it[1]}, + is_narrow_peak + ) + ch_plot_genrich_qc_txt = PLOT_GENRICH_QC.out.txt + ch_plot_genrich_qc_pdf = PLOT_GENRICH_QC.out.pdf + ch_versions = ch_versions.mix(PLOT_GENRICH_QC.out.versions) + + // + // Peak annotation QC plots with R + // + PLOT_HOMER_ANNOTATEPEAKS ( + HOMER_ANNOTATEPEAKS.out.txt.collect{it[1]}, + ch_peak_annotation_header_multiqc, + annotate_peaks_suffix + ) + ch_plot_homer_annotatepeaks_txt = PLOT_HOMER_ANNOTATEPEAKS.out.txt + ch_plot_homer_annotatepeaks_pdf = PLOT_HOMER_ANNOTATEPEAKS.out.pdf + ch_plot_homer_annotatepeaks_tsv = PLOT_HOMER_ANNOTATEPEAKS.out.tsv + ch_versions = ch_versions.mix(PLOT_HOMER_ANNOTATEPEAKS.out.versions) + } + } + + emit: + peaks = ch_genrich_peaks // channel: [ val(meta), [ peaks ] ] + bed_intervals = GENRICH.out.bed_intervals // channel: [ val(meta), [ bed ] ] + bedgraph_pileup = GENRICH.out.bedgraph_pileup // channel: [ val(meta), [ bedgraph ] ] + bedgraph_pvalues = GENRICH.out.bedgraph_pvalues // channel: [ val(meta), [ bedgraph ] ] + duplicates = GENRICH.out.duplicates // channel: [ val(meta), [ bedgraph ] ] + + frip_txt = FRIP_SCORE.out.txt // channel: [ val(meta), [ txt ] ] + + frip_multiqc = MULTIQC_CUSTOM_PEAKS.out.frip // channel: [ val(meta), [ frip ] ] + peak_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count // channel: [ val(meta), [ counts ] ] + + homer_annotatepeaks = ch_homer_annotatepeaks // channel: [ val(meta), [ txt ] ] + homer_det_annotatepeaks = ch_homer_det_annotatepeaks // channel: [ val(meta), [ txt ] ] + plot_genrich_qc_txt = ch_plot_genrich_qc_txt // channel: [ txt ] + plot_genrich_qc_pdf = ch_plot_genrich_qc_pdf // channel: [ pdf ] + + plot_homer_annotatepeaks_txt = ch_plot_homer_annotatepeaks_txt // channel: [ txt ] + plot_homer_annotatepeaks_pdf = ch_plot_homer_annotatepeaks_pdf // channel: [ pdf ] + plot_homer_annotatepeaks_tsv = ch_plot_homer_annotatepeaks_tsv // channel: [ tsv ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index d6dbffcf..cd54d112 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -143,13 +143,15 @@ workflow PREPARE_GENOME { // Prepare genome intervals for filtering by removing regions in blacklist file // ch_genome_filtered_bed = Channel.empty() + ch_genome_blacklist_bed = Channel.empty() GENOME_BLACKLIST_REGIONS ( ch_chrom_sizes, ch_blacklist.ifEmpty([]), params.mito_name ?: '', params.keep_mito ) - ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.bed + ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.whitelist_bed + ch_genome_blacklist_bed = GENOME_BLACKLIST_REGIONS.out.blacklist_bed ch_versions = ch_versions.mix(GENOME_BLACKLIST_REGIONS.out.versions) // @@ -244,7 +246,8 @@ workflow PREPARE_GENOME { gene_bed = ch_gene_bed // path: gene.bed tss_bed = ch_tss_bed // path: tss.bed chrom_sizes = ch_chrom_sizes // path: genome.sizes - filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed + filtered_bed = ch_genome_filtered_bed // path: *.whitelist_regions.bed + blacklist_bed = ch_genome_blacklist_bed // path: *.blacklist_regions.bed bwa_index = ch_bwa_index // path: bwa/index/ bowtie2_index = ch_bowtie2_index // path: bowtie2/index/ chromap_index = ch_chromap_index // path: genome.index diff --git a/workflows/atacseq.nf b/workflows/atacseq.nf index 5daf5839..3266ee2c 100644 --- a/workflows/atacseq.nf +++ b/workflows/atacseq.nf @@ -43,8 +43,19 @@ ch_bamtools_filter_pe_config = file(params.bamtools_filter_pe_config) // Header files for MultiQC ch_multiqc_merged_library_peak_count_header = file("$projectDir/assets/multiqc/merged_library_peak_count_header.txt", checkIfExists: true) +ch_multiqc_merged_library_sep_genrich_peak_count_header = file("$projectDir/assets/multiqc/merged_library_sep_genrich_peak_count_header.txt", checkIfExists: true) +ch_multiqc_merged_library_joint_genrich_peak_count_header = file("$projectDir/assets/multiqc/merged_library_joint_genrich_peak_count_header.txt", checkIfExists: true) + + ch_multiqc_merged_library_frip_score_header = file("$projectDir/assets/multiqc/merged_library_frip_score_header.txt", checkIfExists: true) +ch_multiqc_merged_library_sep_genrich_frip_score_header = file("$projectDir/assets/multiqc/merged_library_sep_genrich_frip_score_header.txt", checkIfExists: true) +ch_multiqc_merged_library_joint_genrich_frip_score_header = file("$projectDir/assets/multiqc/merged_library_joint_genrich_frip_score_header.txt", checkIfExists: true) + ch_multiqc_merged_library_peak_annotation_header = file("$projectDir/assets/multiqc/merged_library_peak_annotation_header.txt", checkIfExists: true) +ch_multiqc_merged_library_sep_genrich_peak_annotation_header = file("$projectDir/assets/multiqc/merged_library_sep_genrich_peak_annotation_header.txt", checkIfExists: true) +ch_multiqc_merged_library_joint_genrich_peak_annotation_header = file("$projectDir/assets/multiqc/merged_library_joint_genrich_peak_annotation_header.txt", checkIfExists: true) + + ch_multiqc_merged_library_deseq2_pca_header = file("$projectDir/assets/multiqc/merged_library_deseq2_pca_header.txt", checkIfExists: true) ch_multiqc_merged_library_deseq2_clustering_header = file("$projectDir/assets/multiqc/merged_library_deseq2_clustering_header.txt", checkIfExists: true) @@ -71,14 +82,21 @@ include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' include { ALIGN_STAR } from '../subworkflows/local/align_star' include { BIGWIG_PLOT_DEEPTOOLS as MERGED_LIBRARY_BIGWIG_PLOT_DEEPTOOLS } from '../subworkflows/local/bigwig_plot_deeptools' include { BAM_FILTER_BAMTOOLS as MERGED_LIBRARY_FILTER_BAM } from '../subworkflows/local/bam_filter_bamtools' +include { BAM_NOFILTER_BAMTOOLS as MERGED_LIBRARY_NOFILTER_BAM } from '../subworkflows/local/bam_nofilter_bamtools' include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_LIBRARY_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' +include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_LIBRARY_NF_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC as MERGED_REPLICATE_BAM_TO_BIGWIG } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc' -include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER as MERGED_LIBRARY_CALL_ANNOTATE_PEAKS } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf' -include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER as MERGED_REPLICATE_CALL_ANNOTATE_PEAKS } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf' +include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER as MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2 } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf' +include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER as MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2 } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf' include { BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 as MERGED_LIBRARY_CONSENSUS_PEAKS } from '../subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf' include { BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 as MERGED_REPLICATE_CONSENSUS_PEAKS } from '../subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf' +include { BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER as MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH } from '../subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf' +include { BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER as MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH } from '../subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf' + + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS @@ -157,10 +175,20 @@ workflow ATACSEQ { ) ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_TRIMGALORE.out.versions) + // + // Check if analyze_multimappers is set + // + if (params.analyze_multimappers) { + if (params.aligner == 'bwa' || params.aligner == 'chromap') { + exit 1, 'Multimapper analysis is only supported for Bowtie2 or STAR so far.' + } + } + // // SUBWORKFLOW: Alignment with BWA & BAM QC // ch_genome_bam = Channel.empty() + ch_genome_bam_index = Channel.empty() ch_samtools_stats = Channel.empty() ch_samtools_flagstat = Channel.empty() ch_samtools_idxstats = Channel.empty() @@ -175,6 +203,7 @@ workflow ATACSEQ { } ) ch_genome_bam = FASTQ_ALIGN_BWA.out.bam + ch_genome_bam_index = FASTQ_ALIGN_BWA.out.bai ch_samtools_stats = FASTQ_ALIGN_BWA.out.stats ch_samtools_flagstat = FASTQ_ALIGN_BWA.out.flagstat ch_samtools_idxstats = FASTQ_ALIGN_BWA.out.idxstats @@ -196,6 +225,7 @@ workflow ATACSEQ { } ) ch_genome_bam = FASTQ_ALIGN_BOWTIE2.out.bam + ch_genome_bam_index = FASTQ_ALIGN_BOWTIE2.out.bai ch_samtools_stats = FASTQ_ALIGN_BOWTIE2.out.stats ch_samtools_flagstat = FASTQ_ALIGN_BOWTIE2.out.flagstat ch_samtools_idxstats = FASTQ_ALIGN_BOWTIE2.out.idxstats @@ -219,6 +249,7 @@ workflow ATACSEQ { [] ) ch_genome_bam = FASTQ_ALIGN_CHROMAP.out.bam + ch_genome_bam_index = FASTQ_ALIGN_CHROMAP.out.bai ch_samtools_stats = FASTQ_ALIGN_CHROMAP.out.stats ch_samtools_flagstat = FASTQ_ALIGN_CHROMAP.out.flagstat ch_samtools_idxstats = FASTQ_ALIGN_CHROMAP.out.idxstats @@ -240,6 +271,7 @@ workflow ATACSEQ { params.seq_center ?: '' ) ch_genome_bam = ALIGN_STAR.out.bam + ch_genome_bam_index = ALIGN_STAR.out.bai ch_samtools_stats = ALIGN_STAR.out.stats ch_samtools_flagstat = ALIGN_STAR.out.flagstat ch_samtools_idxstats = ALIGN_STAR.out.idxstats @@ -293,17 +325,7 @@ workflow ATACSEQ { // SUBWORKFLOW: Filter BAM file // MERGED_LIBRARY_FILTER_BAM ( - MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bam - .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0], remainder: true) - .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.csi, by: [0], remainder: true) - .map { - meta, bam, bai, csi -> - if (bai) { - [ meta, bam, bai ] - } else { - [ meta, bam, csi ] - } - }, + MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bam.join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0]), PREPARE_GENOME.out.filtered_bed.first(), PREPARE_GENOME .out @@ -384,16 +406,7 @@ workflow ATACSEQ { MERGED_LIBRARY_FILTER_BAM .out .bam - .join(MERGED_LIBRARY_FILTER_BAM.out.bai, by: [0], remainder: true) - .join(MERGED_LIBRARY_FILTER_BAM.out.csi, by: [0], remainder: true) - .map { - meta, bam, bai, csi -> - if (bai) { - [ meta, bam, bai ] - } else { - [ meta, bam, csi ] - } - } + .join(MERGED_LIBRARY_FILTER_BAM.out.bai, by: [0]) .set { ch_bam_bai } if (params.with_control) { @@ -446,7 +459,7 @@ workflow ATACSEQ { // // SUBWORKFLOW: Call peaks with MACS2, annotate with HOMER and perform downstream QC // - MERGED_LIBRARY_CALL_ANNOTATE_PEAKS ( + MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2 ( ch_bam_library, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.gtf, @@ -459,7 +472,12 @@ workflow ATACSEQ { params.skip_peak_annotation, params.skip_peak_qc ) - ch_versions = ch_versions.mix(MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.versions) + ch_library_peaks = MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.peaks + ch_library_frip_multiqc = MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.frip_multiqc + ch_library_peak_count_multiqc = MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.peak_count_multiqc + ch_library_plot_homer_annotatepeaks_tsv = MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.plot_homer_annotatepeaks_tsv + + ch_versions = ch_versions.mix(MERGED_LIBRARY_CALL_ANNOTATE_PEAKS_MACS2.out.versions) // // SUBWORKFLOW: Consensus peaks analysis @@ -470,7 +488,7 @@ workflow ATACSEQ { ch_deseq2_clustering_library_multiqc = Channel.empty() if (!params.skip_consensus_peaks) { MERGED_LIBRARY_CONSENSUS_PEAKS ( - MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peaks, + ch_library_peaks, ch_bam_library, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.gtf, @@ -487,21 +505,103 @@ workflow ATACSEQ { ch_versions = ch_versions.mix(MERGED_LIBRARY_CONSENSUS_PEAKS.out.versions) } + // + // SUBWORKFLOW: Sort by name and generate stats for unfiltered BAMs + // + MERGED_LIBRARY_NOFILTER_BAM ( + PICARD_MERGESAMFILES_LIBRARY.out.bam, + PREPARE_GENOME + .out + .fasta + .map { + [ [:], it ] + } + ) + ch_bam = MERGED_LIBRARY_NOFILTER_BAM.out.bam + ch_name_bam = MERGED_LIBRARY_NOFILTER_BAM.out.name_bam + ch_versions = ch_versions.mix(MERGED_LIBRARY_NOFILTER_BAM.out.versions.first()) + + // + // SUBWORKFLOW: Normalised bigWig coverage tracks + // + MERGED_LIBRARY_NF_BAM_TO_BIGWIG ( + ch_bam.join(MERGED_LIBRARY_NOFILTER_BAM.out.flagstat, by: [0]), + PREPARE_GENOME.out.chrom_sizes + ) + ch_versions = ch_versions.mix(MERGED_LIBRARY_NF_BAM_TO_BIGWIG.out.versions) + + + // Create channels: [ meta, [bam] ] or [ meta, [ bam, control_bam ] ] (now for Genrich) + if (params.with_control) { + ch_name_bam + .map { + meta, bam -> + meta.control ? null : [ meta.id, [ bam ] ] + } + .set { ch_control_bam } + + ch_name_bam + .map { + meta, bam -> + meta.control ? [ meta.control, meta, [ bam ] ] : null + } + .combine(ch_control_bam, by: 0) + .map { it -> [ it[1] , it[2] + it[4] ] } + .set { ch_name_bam } + } + + // Create channel: [ val(meta), bam, control_bam ] (now for Genrich) + if (params.with_control) { + ch_name_bam + .map { + meta, bams -> + [ meta , bams[0], bams[1] ] + } + .set { ch_merged_library_bams_sep } + } else { + ch_name_bam + .map { + meta, bam -> + [ meta , bam, [] ] + } + .set { ch_merged_library_bams_sep } + } + + // + // SUBWORKFLOW: Call peaks with Genrich, annotate with HOMER and perform downstream QC + // + MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH ( + ch_merged_library_bams_sep, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gtf, + params.genome, + PREPARE_GENOME.out.blacklist_bed.first(), + ".mLb.genrich.speaks.annotatePeaks.txt", + ch_multiqc_merged_library_sep_genrich_peak_count_header, + ch_multiqc_merged_library_sep_genrich_frip_score_header, + ch_multiqc_merged_library_sep_genrich_peak_annotation_header, + params.narrow_peak, + params.skip_peak_annotation, + params.skip_peak_qc, + params.save_genrich_pvalues, + params.save_genrich_pileup, + params.save_genrich_bed, + params.save_genrich_duplicates, + params.homer_detail_annotation + ) + ch_library_genrich_sep_peaks = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.peaks + ch_library_genrich_sep_frip_multiqc = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.frip_multiqc + ch_library_genrich_sep_peak_count_multiqc = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.peak_count_multiqc + ch_library_genrich_sep_plot_homer_annotatepeaks_tsv = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.plot_homer_annotatepeaks_tsv + + ch_versions = ch_versions.mix(MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.versions) + // Create channels: [ meta, bam, bai, peak_file ] MERGED_LIBRARY_MARKDUPLICATES_PICARD .out .bam - .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0], remainder: true) - .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.csi, by: [0], remainder: true) - .map { - meta, bam, bai, csi -> - if (bai) { - [ meta, bam, bai ] - } else { - [ meta, bam, csi ] - } - } - .join(MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peaks, by: [0]) + .join(MERGED_LIBRARY_MARKDUPLICATES_PICARD.out.bai, by: [0]) + .join(ch_library_peaks, by: [0]) .set { ch_bam_peaks } // @@ -635,7 +735,8 @@ workflow ATACSEQ { // // SUBWORKFLOW: Call peaks with MACS2, annotate with HOMER and perform downstream QC // - MERGED_REPLICATE_CALL_ANNOTATE_PEAKS ( + + MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2 ( ch_bam_replicate, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.gtf, @@ -648,18 +749,18 @@ workflow ATACSEQ { params.skip_peak_annotation, params.skip_peak_qc ) - ch_macs2_replicate_peaks = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.peaks - ch_macs2_frip_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.frip_multiqc - ch_macs2_peak_count_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.peak_count_multiqc - ch_macs2_plot_homer_annotatepeaks_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.plot_homer_annotatepeaks_tsv - ch_versions = ch_versions.mix(MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.versions) + ch_macs2_replicate_peaks = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.peaks + ch_macs2_frip_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.frip_multiqc + ch_macs2_peak_count_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.peak_count_multiqc + ch_macs2_plot_homer_annotatepeaks_replicate_multiqc = MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.plot_homer_annotatepeaks_tsv + ch_versions = ch_versions.mix(MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.versions) // // SUBWORKFLOW: Consensus peaks analysis // if (!params.skip_consensus_peaks) { MERGED_REPLICATE_CONSENSUS_PEAKS ( - MERGED_REPLICATE_CALL_ANNOTATE_PEAKS.out.peaks, + MERGED_REPLICATE_CALL_ANNOTATE_PEAKS_MACS2.out.peaks, ch_merged_library_replicate_bam, PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.gtf, @@ -677,6 +778,51 @@ workflow ATACSEQ { } } + // Check if we have multiple replicates (now for Genrich) + ch_merged_library_bams_sep + .map { + meta, bam, control_bam -> + def meta_clone = meta.clone() + meta_clone.id = meta_clone.id - ~/_REP\d+$/ + meta_clone.control = meta_clone.control ? meta_clone.control - ~/_REP\d+$/ : "" + [ meta_clone.id, meta_clone, bam, control_bam ] + } + .groupTuple() + .map { + id, metas, bams, control_bams -> + [ metas[0], bams.flatten(), control_bams.flatten() ] + } + .set { ch_merged_library_bams_joint } + + // + // SUBWORKFLOW: Call peaks with Genrich, annotate with HOMER and perform downstream QC + // + MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH ( + ch_merged_library_bams_joint, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gtf, + params.genome, + PREPARE_GENOME.out.blacklist_bed.first(), + ".mLb.genrich.jpeaks.annotatePeaks.txt", + ch_multiqc_merged_library_joint_genrich_peak_count_header, + ch_multiqc_merged_library_joint_genrich_frip_score_header, + ch_multiqc_merged_library_joint_genrich_peak_annotation_header, + params.narrow_peak, + params.skip_peak_annotation, + params.skip_peak_qc, + params.save_genrich_pvalues, + params.save_genrich_pileup, + params.save_genrich_bed, + params.save_genrich_duplicates, + params.homer_detail_annotation + ) + ch_library_genrich_joint_peaks = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.peaks + ch_library_genrich_joint_frip_multiqc = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.frip_multiqc + ch_library_genrich_joint_peak_count_multiqc = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.peak_count_multiqc + ch_library_genrich_joint_plot_homer_annotatepeaks_tsv = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.plot_homer_annotatepeaks_tsv + + ch_versions = ch_versions.mix(MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.versions) + // // MODULE: Create IGV session // @@ -685,11 +831,16 @@ workflow ATACSEQ { PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.fai, MERGED_LIBRARY_BAM_TO_BIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]), - MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peaks.collect{it[1]}.ifEmpty([]), + ch_library_peaks.collect{it[1]}.ifEmpty([]), ch_macs2_consensus_library_bed.collect{it[1]}.ifEmpty([]), ch_ucsc_bedgraphtobigwig_replicate_bigwig.collect{it[1]}.ifEmpty([]), ch_macs2_replicate_peaks.collect{it[1]}.ifEmpty([]), ch_macs2_consensus_replicate_bed.collect{it[1]}.ifEmpty([]), + + MERGED_LIBRARY_NF_BAM_TO_BIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]), + ch_library_genrich_sep_peaks.collect{it[1]}.ifEmpty([]), + ch_library_genrich_joint_peaks.collect{it[1]}.ifEmpty([]), + "${params.aligner}/merged_library/bigwig", { ["${params.aligner}/merged_library/macs2", params.narrow_peak? '/narrow_peak' : '/broad_peak' @@ -706,6 +857,15 @@ workflow ATACSEQ { params.narrow_peak? '/narrow_peak' : '/broad_peak', "/consensus" ].join('') }, + + "${params.aligner}/merged_library/genrich/bigwig", + { ["${params.aligner}/merged_library/genrich/sep", + params.narrow_peak? '/narrow_peak' : '/broad_peak' + ].join('') }, + { ["${params.aligner}/merged_library/genrich/joint", + params.narrow_peak? '/narrow_peak' : '/broad_peak' + ].join('') }, + ) ch_versions = ch_versions.mix(IGV.out.versions) } @@ -756,11 +916,15 @@ workflow ATACSEQ { ch_deeptoolsplotprofile_multiqc.collect{it[1]}.ifEmpty([]), ch_deeptoolsplotfingerprint_multiqc.collect{it[1]}.ifEmpty([]), - MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.frip_multiqc.collect{it[1]}.ifEmpty([]), - MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.peak_count_multiqc.collect{it[1]}.ifEmpty([]), - MERGED_LIBRARY_CALL_ANNOTATE_PEAKS.out.plot_homer_annotatepeaks_tsv.collect().ifEmpty([]), + ch_library_frip_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_peak_count_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_plot_homer_annotatepeaks_tsv.collect().ifEmpty([]), ch_featurecounts_library_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_genrich_sep_frip_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_genrich_sep_peak_count_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_genrich_sep_plot_homer_annotatepeaks_tsv.collect().ifEmpty([]), + ch_markduplicates_replicate_stats.collect{it[1]}.ifEmpty([]), ch_markduplicates_replicate_flagstat.collect{it[1]}.ifEmpty([]), ch_markduplicates_replicate_idxstats.collect{it[1]}.ifEmpty([]), @@ -774,7 +938,11 @@ workflow ATACSEQ { ch_deseq2_pca_library_multiqc.collect().ifEmpty([]), ch_deseq2_clustering_library_multiqc.collect().ifEmpty([]), ch_deseq2_pca_replicate_multiqc.collect().ifEmpty([]), - ch_deseq2_clustering_replicate_multiqc.collect().ifEmpty([]) + ch_deseq2_clustering_replicate_multiqc.collect().ifEmpty([]), + + ch_library_genrich_joint_frip_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_genrich_joint_peak_count_multiqc.collect{it[1]}.ifEmpty([]), + ch_library_genrich_joint_plot_homer_annotatepeaks_tsv.collect().ifEmpty([]) ) multiqc_report = MULTIQC.out.report.toList() } From 7ba93fe3939261b769f725ca99e297210c84f43a Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 15 Aug 2023 14:37:49 +0200 Subject: [PATCH 02/18] fixing genrich_sep output dirs --- conf/modules.config | 75 ++++++++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 39 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index bd6d4a01..07957604 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -721,7 +721,6 @@ if (!params.skip_peak_annotation) { } process { - withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:SAMTOOLS_INDEX' { ext.prefix = { "${meta.id}.mLb.sorted" } publishDir = [ @@ -799,10 +798,9 @@ process { process { withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' { ext.prefix = { "${meta.id}.mLb" } - ext.args = [ - '-j', - params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '', - params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '', + ext.args = [ '-j', + params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '', + params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '', params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : '' ].join(' ').trim() publishDir = [ @@ -829,10 +827,22 @@ process { ] } - if (!params.skip_peak_annotation) { +if (!params.skip_peak_annotation) { + process { + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' { + ext.args = '-gid' + ext.prefix = { "${meta.id}.mLb.genrich.speaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (params.homer_detail_annotation) { process { - withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' { - ext.args = '-gid' + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_DETAIL_ANNOTATEPEAKS' { ext.prefix = { "${meta.id}.mLb.genrich.speaks" } publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, @@ -841,40 +851,27 @@ process { ] } } + } - if (params.homer_detail_annotation) { - process { - withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_DETAIL_ANNOTATEPEAKS' { - ext.prefix = { "${meta.id}.mLb.genrich.speaks" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + if (!params.skip_peak_qc) { + process { + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { + ext.args = '-o ./ -p genrich_speak.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } - } - if (!params.skip_peak_qc) { - process { - withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { - ext.args = '-o ./ -p genrich_speak.mLb' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' { - ext.args = '-o ./' - ext.prefix = 'genrich_annotate_sPeaks.mLb' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' { + ext.args = '-o ./' + ext.prefix = 'genrich_annotate_sPeaks.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } } } From 3932ea2af1e16b103b3122dc1fcae3907a4a852a Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 15 Aug 2023 14:41:57 +0200 Subject: [PATCH 03/18] Adding if closure in modules.config --- conf/modules.config | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 07957604..1cf65835 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -826,6 +826,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } +} if (!params.skip_peak_annotation) { process { @@ -1101,8 +1102,6 @@ if (!params.skip_merge_replicates) { } } - - process { withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' { ext.prefix = { "${meta.id}.mLb" } From 238f78177c9cd4b6b8584db2153b186311a30e2a Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 15 Aug 2023 14:54:32 +0200 Subject: [PATCH 04/18] fixing se bam name sorting for genrich --- subworkflows/local/bam_nofilter_bamtools.nf | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/bam_nofilter_bamtools.nf b/subworkflows/local/bam_nofilter_bamtools.nf index 9c279195..b3b8be21 100644 --- a/subworkflows/local/bam_nofilter_bamtools.nf +++ b/subworkflows/local/bam_nofilter_bamtools.nf @@ -40,24 +40,26 @@ workflow BAM_NOFILTER_BAMTOOLS { ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions.first()) // - // Name sort PE BAM before filtering with pysam + // Name sort BAM // SAMTOOLS_SORT ( - ch_bam_b.paired_end + ch_bam_b ) + ch_bam_b_pe = SAMTOOLS_SORT.out.bam.paired_end + ch_bam_b_se = SAMTOOLS_SORT.out.bam.single_end ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) // // Sort, index PE BAM file and run samtools stats, flagstat and idxstats // BAM_SORT_STATS_SAMTOOLS ( - SAMTOOLS_SORT.out.bam, + ch_bam_b_pe, ch_fasta ) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions.first()) emit: - name_bam = SAMTOOLS_SORT.out.bam.mix(ch_bam_b.single_end) // channel: [ val(meta), [ bam ] ] + name_bam = ch_bam_b_pe.mix(ch_bam_b_se.single_end) // channel: [ val(meta), [ bam ] ] bam = BAM_SORT_STATS_SAMTOOLS.out.bam.mix(ch_bam_b.single_end) // channel: [ val(meta), [ bam ] ] stats = BAM_SORT_STATS_SAMTOOLS.out.stats.mix(BAM_STATS_SAMTOOLS.out.stats) // channel: [ val(meta), [ stats ] ] flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat.mix(BAM_STATS_SAMTOOLS.out.flagstat) // channel: [ val(meta), [ flagstat ] ] From 72072090c404ec1da5e613f6337a6e72820fabec Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 15 Aug 2023 14:58:34 +0200 Subject: [PATCH 05/18] fixing se bam name sorting for genrich --- subworkflows/local/bam_nofilter_bamtools.nf | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/bam_nofilter_bamtools.nf b/subworkflows/local/bam_nofilter_bamtools.nf index b3b8be21..e281ff62 100644 --- a/subworkflows/local/bam_nofilter_bamtools.nf +++ b/subworkflows/local/bam_nofilter_bamtools.nf @@ -45,21 +45,19 @@ workflow BAM_NOFILTER_BAMTOOLS { SAMTOOLS_SORT ( ch_bam_b ) - ch_bam_b_pe = SAMTOOLS_SORT.out.bam.paired_end - ch_bam_b_se = SAMTOOLS_SORT.out.bam.single_end ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) // // Sort, index PE BAM file and run samtools stats, flagstat and idxstats // BAM_SORT_STATS_SAMTOOLS ( - ch_bam_b_pe, + SAMTOOLS_SORT.out.bam.paired_end, ch_fasta ) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions.first()) emit: - name_bam = ch_bam_b_pe.mix(ch_bam_b_se.single_end) // channel: [ val(meta), [ bam ] ] + name_bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] bam = BAM_SORT_STATS_SAMTOOLS.out.bam.mix(ch_bam_b.single_end) // channel: [ val(meta), [ bam ] ] stats = BAM_SORT_STATS_SAMTOOLS.out.stats.mix(BAM_STATS_SAMTOOLS.out.stats) // channel: [ val(meta), [ stats ] ] flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat.mix(BAM_STATS_SAMTOOLS.out.flagstat) // channel: [ val(meta), [ flagstat ] ] From 1f9e99e19f05fc613c8ad72be45161ada241c7c4 Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 15 Aug 2023 15:06:14 +0200 Subject: [PATCH 06/18] fixing se bam name sorting for genrich --- conf/modules.config | 13 +++++++++++- subworkflows/local/bam_nofilter_bamtools.nf | 23 ++++++++++++++------- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 1cf65835..73b9c710 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -739,7 +739,7 @@ process { ] } - withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:SAMTOOLS_SORT' { + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:SAMTOOLS_SORT_SE' { ext.args = '-n' ext.prefix = { "${meta.id}.mLb.namesorted" } publishDir = [ @@ -759,6 +759,17 @@ process { ] } + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:SAMTOOLS_SORT_PE' { + ext.args = '-n' + ext.prefix = { "${meta.id}.mLb.namesorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/samtools" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ] + } + withName: '.*:MERGED_LIBRARY_NOFILTER_BAM:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.mLb.sorted.bam" } publishDir = [ diff --git a/subworkflows/local/bam_nofilter_bamtools.nf b/subworkflows/local/bam_nofilter_bamtools.nf index e281ff62..e2697f31 100644 --- a/subworkflows/local/bam_nofilter_bamtools.nf +++ b/subworkflows/local/bam_nofilter_bamtools.nf @@ -1,4 +1,5 @@ -include { SAMTOOLS_SORT } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_SE } from '../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_PE } from '../../modules/nf-core/samtools/index/main' include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools/main' include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools/main' @@ -30,6 +31,14 @@ workflow BAM_NOFILTER_BAMTOOLS { } ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + // + // Name sort SE BAM + // + SAMTOOLS_SORT_SE ( + ch_bam_b.single_end + ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_SE.out.versions.first()) + // // Run samtools stats, flagstat and idxstats on SE BAM // @@ -40,24 +49,24 @@ workflow BAM_NOFILTER_BAMTOOLS { ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions.first()) // - // Name sort BAM + // Name sort PE BAM // - SAMTOOLS_SORT ( - ch_bam_b + SAMTOOLS_SORT_PE ( + ch_bam_b.paired_end ) - ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + ch_versions = ch_versions.mix(SAMTOOLS_SORT_PE.out.versions.first()) // // Sort, index PE BAM file and run samtools stats, flagstat and idxstats // BAM_SORT_STATS_SAMTOOLS ( - SAMTOOLS_SORT.out.bam.paired_end, + SAMTOOLS_SORT_PE.out.bam, ch_fasta ) ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions.first()) emit: - name_bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + name_bam = SAMTOOLS_SORT_PE.out.bam.mix(SAMTOOLS_SORT_SE.out.bam) // channel: [ val(meta), [ bam ] ] bam = BAM_SORT_STATS_SAMTOOLS.out.bam.mix(ch_bam_b.single_end) // channel: [ val(meta), [ bam ] ] stats = BAM_SORT_STATS_SAMTOOLS.out.stats.mix(BAM_STATS_SAMTOOLS.out.stats) // channel: [ val(meta), [ stats ] ] flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat.mix(BAM_STATS_SAMTOOLS.out.flagstat) // channel: [ val(meta), [ flagstat ] ] From 02f89ee143a91d0fc01062a2de907fbf0eaecdc9 Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 15 Aug 2023 15:09:25 +0200 Subject: [PATCH 07/18] fixing se bam name sorting for genrich --- subworkflows/local/bam_nofilter_bamtools.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/bam_nofilter_bamtools.nf b/subworkflows/local/bam_nofilter_bamtools.nf index e2697f31..31bf8521 100644 --- a/subworkflows/local/bam_nofilter_bamtools.nf +++ b/subworkflows/local/bam_nofilter_bamtools.nf @@ -1,5 +1,5 @@ include { SAMTOOLS_SORT as SAMTOOLS_SORT_SE } from '../../modules/nf-core/samtools/sort/main' -include { SAMTOOLS_SORT as SAMTOOLS_SORT_PE } from '../../modules/nf-core/samtools/index/main' +include { SAMTOOLS_SORT as SAMTOOLS_SORT_PE } from '../../modules/nf-core/samtools/sort/main' include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools/main' include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools/main' From 002351d359d37250297f0a1cb3f325a0c5e4d32e Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 15 Aug 2023 15:20:39 +0200 Subject: [PATCH 08/18] added distance to TSS 1kb subset to plot --- bin/plot_homer_annotatepeaks.r | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/plot_homer_annotatepeaks.r b/bin/plot_homer_annotatepeaks.r index fc2096eb..b0e9a15a 100755 --- a/bin/plot_homer_annotatepeaks.r +++ b/bin/plot_homer_annotatepeaks.r @@ -90,6 +90,7 @@ for (idx in 1:length(HomerFiles)) { dist.freq[which(unique.gene.dat$Distance.to.TSS < 10000)] <- "< 10kb" dist.freq[which(unique.gene.dat$Distance.to.TSS < 5000)] <- "< 5kb" dist.freq[which(unique.gene.dat$Distance.to.TSS < 2000)] <- "< 2kb" + dist.freq[which(unique.gene.dat$Distance.to.TSS < 1000)] <- "< 1kb" dist.freq <- as.data.frame(table(dist.freq)) colnames(dist.freq) <- c("distance",sampleid) dist.melt <- melt(dist.freq) From 6997f7dc39c1532a1dc60b414d7a965c5f185a64 Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 15 Aug 2023 16:08:09 +0200 Subject: [PATCH 09/18] adding default skip_genrich_sep = true --- conf/modules.config | 114 ++++++++++++++++++++++--------------------- nextflow.config | 1 + workflows/atacseq.nf | 55 +++++++++++---------- 3 files changed, 89 insertions(+), 81 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 73b9c710..aa9aecc0 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1113,55 +1113,44 @@ if (!params.skip_merge_replicates) { } } -process { - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' { - ext.prefix = { "${meta.id}.mLb" } - ext.args = [ '-j', - params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '', - params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '', - params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : '' - ].join(' ').trim() - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:FRIP_SCORE' { - ext.args = '-bed -c -f 0.20' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - enabled: false - ] - } +if (!params.skip_genrich_sep) { + process { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' { + ext.prefix = { "${meta.id}.mLb" } + ext.args = [ '-j', + params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '', + params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '', + params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : '' + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:MULTIQC_CUSTOM_PEAKS' { - ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:FRIP_SCORE' { + ext.args = '-bed -c -f 0.20' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + enabled: false + ] + } -if (!params.skip_peak_annotation) { - process { - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' { - ext.args = '-gid' + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:MULTIQC_CUSTOM_PEAKS' { ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } } - if (params.homer_detail_annotation) { + if (!params.skip_peak_annotation) { process { - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_DETAIL_ANNOTATEPEAKS' { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' { + ext.args = '-gid' ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } publishDir = [ path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, @@ -1170,27 +1159,40 @@ if (!params.skip_peak_annotation) { ] } } - } - if (!params.skip_peak_qc) { - process { - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { - ext.args = '-o ./ -p genrich_jpeak.mLb' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + if (params.homer_detail_annotation) { + process { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_DETAIL_ANNOTATEPEAKS' { + ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } + } - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' { - ext.args = '-o ./' - ext.prefix = 'genrich_annotate_jPeaks.mLb' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + if (!params.skip_peak_qc) { + process { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { + ext.args = '-o ./ -p genrich_jpeak.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' { + ext.args = '-o ./' + ext.prefix = 'genrich_annotate_jPeaks.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } } } diff --git a/nextflow.config b/nextflow.config index 007036aa..3d042cc3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -60,6 +60,7 @@ params { skip_consensus_peaks = false // Options: Genrich + skip_genrich_sep = true genrich_qvalue = 0.01 genrich_pvalue = 0.01 save_genrich_pileup = true diff --git a/workflows/atacseq.nf b/workflows/atacseq.nf index 3266ee2c..d517eecb 100644 --- a/workflows/atacseq.nf +++ b/workflows/atacseq.nf @@ -570,31 +570,36 @@ workflow ATACSEQ { // // SUBWORKFLOW: Call peaks with Genrich, annotate with HOMER and perform downstream QC // - MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH ( - ch_merged_library_bams_sep, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gtf, - params.genome, - PREPARE_GENOME.out.blacklist_bed.first(), - ".mLb.genrich.speaks.annotatePeaks.txt", - ch_multiqc_merged_library_sep_genrich_peak_count_header, - ch_multiqc_merged_library_sep_genrich_frip_score_header, - ch_multiqc_merged_library_sep_genrich_peak_annotation_header, - params.narrow_peak, - params.skip_peak_annotation, - params.skip_peak_qc, - params.save_genrich_pvalues, - params.save_genrich_pileup, - params.save_genrich_bed, - params.save_genrich_duplicates, - params.homer_detail_annotation - ) - ch_library_genrich_sep_peaks = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.peaks - ch_library_genrich_sep_frip_multiqc = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.frip_multiqc - ch_library_genrich_sep_peak_count_multiqc = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.peak_count_multiqc - ch_library_genrich_sep_plot_homer_annotatepeaks_tsv = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.plot_homer_annotatepeaks_tsv - - ch_versions = ch_versions.mix(MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.versions) + ch_library_genrich_sep_peaks = Channel.empty() + ch_library_genrich_sep_frip_multiqc = Channel.empty() + ch_library_genrich_sep_peak_count_multiqc = Channel.empty() + ch_library_genrich_sep_plot_homer_annotatepeaks_tsv = Channel.empty() + if (!params.skip_genrich_sep) { + MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH ( + ch_merged_library_bams_sep, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gtf, + params.genome, + PREPARE_GENOME.out.blacklist_bed.first(), + ".mLb.genrich.speaks.annotatePeaks.txt", + ch_multiqc_merged_library_sep_genrich_peak_count_header, + ch_multiqc_merged_library_sep_genrich_frip_score_header, + ch_multiqc_merged_library_sep_genrich_peak_annotation_header, + params.narrow_peak, + params.skip_peak_annotation, + params.skip_peak_qc, + params.save_genrich_pvalues, + params.save_genrich_pileup, + params.save_genrich_bed, + params.save_genrich_duplicates, + params.homer_detail_annotation + ) + ch_library_genrich_sep_peaks = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.peaks + ch_library_genrich_sep_frip_multiqc = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.frip_multiqc + ch_library_genrich_sep_peak_count_multiqc = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.peak_count_multiqc + ch_library_genrich_sep_plot_homer_annotatepeaks_tsv = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.plot_homer_annotatepeaks_tsv + ch_versions = ch_versions.mix(MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.versions) + } // Create channels: [ meta, bam, bai, peak_file ] MERGED_LIBRARY_MARKDUPLICATES_PICARD From 47c6bd1dc49e07521be9958abc27fac8beffd642 Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Fri, 18 Aug 2023 15:07:55 +0200 Subject: [PATCH 10/18] edited analyze_multimappers exit message --- workflows/atacseq.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/atacseq.nf b/workflows/atacseq.nf index d517eecb..ee8bb98f 100644 --- a/workflows/atacseq.nf +++ b/workflows/atacseq.nf @@ -179,8 +179,8 @@ workflow ATACSEQ { // Check if analyze_multimappers is set // if (params.analyze_multimappers) { - if (params.aligner == 'bwa' || params.aligner == 'chromap') { - exit 1, 'Multimapper analysis is only supported for Bowtie2 or STAR so far.' + if (params.aligner == 'bwa' || params.aligner == 'chromap' || params.aligner == 'star') { + exit 1, 'Multimapping read analysis is so far only supported for Bowtie2. Remove the --analyze_multimappers parameter or change --aligner to bowtie2.' } } From fc857337983109c8e5d5fa545a45f5f93bef1781 Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Fri, 18 Aug 2023 15:46:39 +0200 Subject: [PATCH 11/18] simplified QC plotting for macs2 and genrich --- bin/plot_genrich_qc.r | 155 ------------------ bin/{plot_macs2_qc.r => plot_peaks_qc.r} | 2 +- modules/local/plot_genrich_qc.nf | 35 ---- .../{plot_macs2_qc.nf => plot_peaks_qc.nf} | 4 +- ...am_peaks_call_qc_annotate_genrich_homer.nf | 12 +- .../bam_peaks_call_qc_annotate_macs2_homer.nf | 10 +- 6 files changed, 14 insertions(+), 204 deletions(-) delete mode 100755 bin/plot_genrich_qc.r rename bin/{plot_macs2_qc.r => plot_peaks_qc.r} (98%) delete mode 100644 modules/local/plot_genrich_qc.nf rename modules/local/{plot_macs2_qc.nf => plot_peaks_qc.nf} (96%) diff --git a/bin/plot_genrich_qc.r b/bin/plot_genrich_qc.r deleted file mode 100755 index c8bd38b9..00000000 --- a/bin/plot_genrich_qc.r +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env Rscript - -################################################ -################################################ -## LOAD LIBRARIES ## -################################################ -################################################ - -library(optparse) -library(ggplot2) -library(reshape2) -library(scales) - -################################################ -################################################ -## PARSE COMMAND-LINE PARAMETERS ## -################################################ -################################################ - -option_list <- list(make_option(c("-i", "--peak_files"), type="character", default=NULL, help="Comma-separated list of peak files.", metavar="path"), - make_option(c("-s", "--sample_ids"), type="character", default=NULL, help="Comma-separated list of sample ids associated with peak files. Must be unique and in same order as peaks files input.", metavar="string"), - make_option(c("-o", "--outdir"), type="character", default='./', help="Output directory", metavar="path"), - make_option(c("-p", "--outprefix"), type="character", default='genrich_peakqc', help="Output prefix", metavar="string")) - -opt_parser <- OptionParser(option_list=option_list) -opt <- parse_args(opt_parser) - -if (is.null(opt$peak_files)){ - print_help(opt_parser) - stop("At least one peak file must be supplied", call.=FALSE) -} -if (is.null(opt$sample_ids)){ - print_help(opt_parser) - stop("Please provide sample ids associated with peak files.", call.=FALSE) -} - -if (file.exists(opt$outdir) == FALSE) { - dir.create(opt$outdir,recursive=TRUE) -} - -PeakFiles <- unlist(strsplit(opt$peak_files,",")) -SampleIDs <- unlist(strsplit(opt$sample_ids,",")) -if (length(PeakFiles) != length(SampleIDs)) { - print_help(opt_parser) - stop("Number of sample ids must equal number of homer annotated files.", call.=FALSE) -} - -################################################ -################################################ -## READ IN DATA ## -################################################ -################################################ - -plot.dat <- data.frame() -summary.dat <- data.frame() -for (idx in 1:length(PeakFiles)) { - - sampleid = SampleIDs[idx] - isNarrow <- FALSE - header <- c("chrom","start","end","name","pileup", "strand", "fold", "-log10(pvalue)","-log10(qvalue)") - fsplit <- unlist(strsplit(basename(PeakFiles[idx]), split='.',fixed=TRUE)) - if (fsplit[length(fsplit)] == 'narrowPeak') { - isNarrow <- TRUE - header <- c(header,"summit") - } - peaks <- read.table(PeakFiles[idx], sep="\t", header=FALSE) - colnames(peaks) <- header - - ## GET SUMMARY STATISTICS - peaks.dat <- peaks[,c('fold','-log10(qvalue)','-log10(pvalue)')] - peaks.dat$length <- (peaks$end - peaks$start) - for (cname in colnames(peaks.dat)) { - sdat <- summary(peaks.dat[,cname]) - sdat["num_peaks"] <- nrow(peaks.dat) - sdat["measure"] <- cname - sdat["sample"] <- sampleid - sdat <- t(data.frame(x=matrix(sdat),row.names=names(sdat))) - summary.dat <- rbind(summary.dat,sdat) - } - colnames(peaks.dat) <- c('fold','fdr','pvalue','length') - peaks.dat$name <- rep(sampleid,nrow(peaks.dat)) - plot.dat <- rbind(plot.dat,peaks.dat) -} -plot.dat$name <- factor(plot.dat$name, levels=sort(unique(as.character(plot.dat$name)))) - -SummaryFile <- file.path(opt$outdir,paste(opt$outprefix,".summary.txt",sep="")) -write.table(summary.dat,file=SummaryFile,quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE) - -################################################ -################################################ -## PLOTS ## -################################################ -################################################ - -## RETURNS VIOLIN PLOT OBJECT -violin.plot <- function(plot.dat,x,y,ylab,title,log) { - - plot <- ggplot(plot.dat, aes_string(x=x, y=y)) + - geom_violin(aes_string(colour=x,fill=x), alpha = 0.3) + - geom_boxplot(width=0.1) + - xlab("") + - ylab(ylab) + - ggtitle(title) + - theme(legend.position="none", - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.background = element_blank(), - axis.text.y = element_text(colour="black"), - axis.text.x= element_text(colour="black",face="bold"), - axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), - axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) - if (log == 10) { - plot <- plot + scale_y_continuous(trans='log10',breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) - } - if (log == 2) { - plot <- plot + scale_y_continuous(trans='log2',breaks = trans_breaks("log2", function(x) 2^x), labels = trans_format("log2", math_format(2^.x))) - } - return(plot) -} - -############################ - -PlotFile <- file.path(opt$outdir,paste(opt$outprefix,".plots.pdf",sep="")) -pdf(PlotFile,height=6,width=3*length(unique(plot.dat$name))) - -## PEAK COUNT PLOT -peak.count.dat <- as.data.frame(table(plot.dat$name)) -colnames(peak.count.dat) <- c("name","count") -plot <- ggplot(peak.count.dat, aes(x=name, y=count)) + - geom_bar(stat="identity",aes(colour=name,fill=name), position = "dodge", width = 0.8, alpha = 0.3) + - xlab("") + - ylab("Number of peaks") + - ggtitle("Peak count") + - theme(legend.position="none", - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.background = element_blank(), - axis.text.y = element_text(colour="black"), - axis.text.x= element_text(colour="black",face="bold"), - axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), - axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) + - geom_text(aes(label = count, x = name, y = count), position = position_dodge(width = 0.8), vjust = -0.6) -print(plot) - -## VIOLIN PLOTS -print(violin.plot(plot.dat=plot.dat,x="name",y="length",ylab=expression(log[10]*" peak length"),title="Peak length distribution",log=10)) -print(violin.plot(plot.dat=plot.dat,x="name",y="fold",ylab=expression(log[2]*" fold-enrichment"),title="Fold-change distribution",log=2)) -print(violin.plot(plot.dat=plot.dat,x="name",y="fdr",ylab=expression(-log[10]*" qvalue"),title="FDR distribution",log=-1)) -print(violin.plot(plot.dat=plot.dat,x="name",y="pvalue",ylab=expression(-log[10]*" pvalue"),title="Pvalue distribution",log=-1)) -dev.off() - -################################################ -################################################ -################################################ -################################################ diff --git a/bin/plot_macs2_qc.r b/bin/plot_peaks_qc.r similarity index 98% rename from bin/plot_macs2_qc.r rename to bin/plot_peaks_qc.r index 5cf074de..0275a1f3 100755 --- a/bin/plot_macs2_qc.r +++ b/bin/plot_peaks_qc.r @@ -20,7 +20,7 @@ library(scales) option_list <- list(make_option(c("-i", "--peak_files"), type="character", default=NULL, help="Comma-separated list of peak files.", metavar="path"), make_option(c("-s", "--sample_ids"), type="character", default=NULL, help="Comma-separated list of sample ids associated with peak files. Must be unique and in same order as peaks files input.", metavar="string"), make_option(c("-o", "--outdir"), type="character", default='./', help="Output directory", metavar="path"), - make_option(c("-p", "--outprefix"), type="character", default='macs2_peakqc', help="Output prefix", metavar="string")) + make_option(c("-p", "--outprefix"), type="character", default='_peakqc', help="Output prefix", metavar="string")) opt_parser <- OptionParser(option_list=option_list) opt <- parse_args(opt_parser) diff --git a/modules/local/plot_genrich_qc.nf b/modules/local/plot_genrich_qc.nf deleted file mode 100644 index a0d5ae98..00000000 --- a/modules/local/plot_genrich_qc.nf +++ /dev/null @@ -1,35 +0,0 @@ -process PLOT_GENRICH_QC { - label 'process_medium' - - conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0': - 'biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" - - input: - path peaks - val is_narrow_peak - - output: - path '*.txt' , emit: txt - path '*.pdf' , emit: pdf - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in nf-core/atacseq/bin/ - def args = task.ext.args ?: '' - def peak_type = is_narrow_peak ? 'narrowPeak' : 'broadPeak' - """ - plot_genrich_qc.r \\ - -i ${peaks.join(',')} \\ - -s ${peaks.join(',').replaceAll("_peaks.${peak_type}","")} \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/local/plot_macs2_qc.nf b/modules/local/plot_peaks_qc.nf similarity index 96% rename from modules/local/plot_macs2_qc.nf rename to modules/local/plot_peaks_qc.nf index a2c39b02..d0e141d2 100644 --- a/modules/local/plot_macs2_qc.nf +++ b/modules/local/plot_peaks_qc.nf @@ -1,4 +1,4 @@ -process PLOT_MACS2_QC { +process PLOT_PEAKS_QC { label 'process_medium' conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" @@ -22,7 +22,7 @@ process PLOT_MACS2_QC { def args = task.ext.args ?: '' def peak_type = is_narrow_peak ? 'narrowPeak' : 'broadPeak' """ - plot_macs2_qc.r \\ + plot_peaks_qc.r \\ -i ${peaks.join(',')} \\ -s ${peaks.join(',').replaceAll("_peaks.${peak_type}","")} \\ $args diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf index 90e8c105..4502e1b8 100644 --- a/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf +++ b/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf @@ -9,7 +9,7 @@ include { HOMER_DETAIL_ANNOTATEPEAKS } from '../../modules/local/homer_detail include { FRIP_SCORE } from '../../modules/local/frip_score' include { MULTIQC_CUSTOM_PEAKS } from '../../modules/local/multiqc_custom_peaks' -include { PLOT_GENRICH_QC } from '../../modules/local/plot_genrich_qc' +include { PLOT_PEAKS_QC as PLOT_GENRICH_QC } from '../../modules/local/plot_peaks_qc' include { PLOT_HOMER_ANNOTATEPEAKS } from '../../modules/local/plot_homer_annotatepeaks' @@ -28,7 +28,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER { is_narrow_peak // boolean: true/false skip_peak_annotation // boolean: true/false skip_peak_qc // boolean: true/false - + save_pvalues // boolean: true/false save_pileup // boolean: true/false save_bed // boolean: true/false @@ -50,7 +50,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER { save_pileup, save_bed, save_duplicates - + ) ch_versions = ch_versions.mix(GENRICH.out.versions.first()) @@ -60,7 +60,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER { GENRICH .out .peak - .filter { + .filter { meta, peaks -> peaks.size() > 0 } @@ -146,7 +146,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER { ) ch_homer_det_annotatepeaks = HOMER_DETAIL_ANNOTATEPEAKS.out.txt ch_versions = ch_versions.mix(HOMER_DETAIL_ANNOTATEPEAKS.out.versions.first()) - } + } } if (!skip_peak_qc) { @@ -184,7 +184,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER { duplicates = GENRICH.out.duplicates // channel: [ val(meta), [ bedgraph ] ] frip_txt = FRIP_SCORE.out.txt // channel: [ val(meta), [ txt ] ] - + frip_multiqc = MULTIQC_CUSTOM_PEAKS.out.frip // channel: [ val(meta), [ frip ] ] peak_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count // channel: [ val(meta), [ counts ] ] diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf index 4c2a8710..42d3195e 100644 --- a/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf +++ b/subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf @@ -7,7 +7,7 @@ include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatep include { FRIP_SCORE } from '../../modules/local/frip_score' include { MULTIQC_CUSTOM_PEAKS } from '../../modules/local/multiqc_custom_peaks' -include { PLOT_MACS2_QC } from '../../modules/local/plot_macs2_qc' +include { PLOT_PEAKS_QC as PLOT_MACS2_QC } from '../../modules/local/plot_peaks_qc' include { PLOT_HOMER_ANNOTATEPEAKS } from '../../modules/local/plot_homer_annotatepeaks' workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { @@ -23,7 +23,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { is_narrow_peak // boolean: true/false skip_peak_annotation // boolean: true/false skip_peak_qc // boolean: true/false - + main: ch_versions = Channel.empty() @@ -43,7 +43,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { MACS2_CALLPEAK .out .peak - .filter { + .filter { meta, peaks -> peaks.size() > 0 } @@ -102,7 +102,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { ) ch_homer_annotatepeaks = HOMER_ANNOTATEPEAKS.out.txt ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS.out.versions.first()) - + if (!skip_peak_qc) { // // MACS2 QC plots with R @@ -138,7 +138,7 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER { bedgraph = MACS2_CALLPEAK.out.bdg // channel: [ val(meta), [ bedgraph ] ] frip_txt = FRIP_SCORE.out.txt // channel: [ val(meta), [ txt ] ] - + frip_multiqc = MULTIQC_CUSTOM_PEAKS.out.frip // channel: [ val(meta), [ frip ] ] peak_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count // channel: [ val(meta), [ counts ] ] From 199c8c5411566c9e516e7ce6a43d1f04042d4eb6 Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Fri, 18 Aug 2023 15:55:18 +0200 Subject: [PATCH 12/18] added skip_genrich_sep to schema --- nextflow_schema.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nextflow_schema.json b/nextflow_schema.json index 448713b8..31aa8d81 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -390,6 +390,11 @@ "description": "Skip consensus peak generation, annotation and counting.", "fa_icon": "fas fa-fast-forward" }, + "skip_genrich_sep": { + "type": "boolean", + "description": "Skip Genrich individual (separate replicates) peak calling. If skipped, each group of biological replicates will be analysed together, which is the recommended approach.", + "fa_icon": "fas fa-fast-forward" + }, "save_genrich_pileup": { "type": "boolean", "description": "Instruct Genrich to create bedGraph files normalised to signal per million reads.", From a18aa15c95db31e0edc1d3ff68f4047a3ee0cbc8 Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 22 Aug 2023 15:03:07 +0200 Subject: [PATCH 13/18] Removing unused peak annotation code --- conf/base.config | 8 +++--- conf/modules.config | 26 ------------------- modules/nf-core/bowtie2/align/main.nf | 2 +- nextflow.config | 1 - nextflow_schema.json | 7 +---- ...am_peaks_call_qc_annotate_genrich_homer.nf | 18 ------------- workflows/atacseq.nf | 8 +++--- 7 files changed, 9 insertions(+), 61 deletions(-) diff --git a/conf/base.config b/conf/base.config index b27712d8..a6ad1a91 100755 --- a/conf/base.config +++ b/conf/base.config @@ -35,12 +35,12 @@ process { time = { check_max( 4.h * task.attempt, 'time' ) } } withLabel:process_medium { - cpus = { check_max( 4 * task.attempt, 'cpus' ) } + cpus = { check_max( 6 * task.attempt, 'cpus' ) } memory = { check_max( 36.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { - cpus = { check_max( 8 * task.attempt, 'cpus' ) } + cpus = { check_max( 12 * task.attempt, 'cpus' ) } memory = { check_max( 72.GB * task.attempt, 'memory' ) } time = { check_max( 16.h * task.attempt, 'time' ) } } @@ -49,8 +49,8 @@ process { } withLabel:process_ultra { cpus = { check_max( 8 * task.attempt, 'cpus' ) } - memory = { check_max( 200.GB * task.attempt, 'memory' ) } - time = { check_max( 72.h * task.attempt, 'time' ) } + memory = { check_max( 150.GB * task.attempt, 'memory' ) } + time = { check_max( 120.h * task.attempt, 'time' ) } } withLabel:process_high_memory { memory = { check_max( 200.GB * task.attempt, 'memory' ) } diff --git a/conf/modules.config b/conf/modules.config index aa9aecc0..6803af59 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -852,19 +852,6 @@ if (!params.skip_peak_annotation) { } } - if (params.homer_detail_annotation) { - process { - withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_DETAIL_ANNOTATEPEAKS' { - ext.prefix = { "${meta.id}.mLb.genrich.speaks" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } - if (!params.skip_peak_qc) { process { withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { @@ -1160,19 +1147,6 @@ if (!params.skip_genrich_sep) { } } - if (params.homer_detail_annotation) { - process { - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_DETAIL_ANNOTATEPEAKS' { - ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - } - } - if (!params.skip_peak_qc) { process { withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf index 74fcc7c7..a77114d2 100644 --- a/modules/nf-core/bowtie2/align/main.nf +++ b/modules/nf-core/bowtie2/align/main.nf @@ -1,6 +1,6 @@ process BOWTIE2_ALIGN { tag "$meta.id" - label "process_ultra" + label "process_high" conda "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/nextflow.config b/nextflow.config index 3d042cc3..62a5c420 100644 --- a/nextflow.config +++ b/nextflow.config @@ -67,7 +67,6 @@ params { save_genrich_pvalues = true save_genrich_bed = true save_genrich_duplicates = true - homer_detail_annotation = false // Options: DESeq2 QC deseq2_vst = true diff --git a/nextflow_schema.json b/nextflow_schema.json index 31aa8d81..9402a7d4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -330,7 +330,7 @@ "type": "integer", "default": 0, "description": "Output this number of multimapping reads from the alignment step and include them in peak calling.", - "help_text": "This will include non-unique reads in the alignment (Bowtie2 and STAR) and peak calling steps (Genrich). Default of 0 means only the best alignment is use and multimappers are ignored.", + "help_text": "This will include non-unique reads in the alignment (Bowtie2) and peak calling steps (Genrich). Default of 0 means only the best alignment is used and multimappers are ignored.", "fa_icon": "fas fa-chart-bar" } } @@ -426,11 +426,6 @@ "default": 0.1, "description": "Minimum p-value cutoff for peak detection.", "fa_icon": "fas fa-sort-amount-down" - }, - "homer_detail_annotation": { - "type": "boolean", - "description": "Annotate peaks with HOMER in detailed mode.", - "fa_icon": "fas fa-search-plus" } } }, diff --git a/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf b/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf index 4502e1b8..e162f628 100644 --- a/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf +++ b/subworkflows/local/bam_peaks_call_qc_annotate_genrich_homer.nf @@ -4,7 +4,6 @@ include { GENRICH } from '../../modules/nf-core/genrich/main' include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main' -include { HOMER_DETAIL_ANNOTATEPEAKS } from '../../modules/local/homer_detailed_ann' include { FRIP_SCORE } from '../../modules/local/frip_score' include { MULTIQC_CUSTOM_PEAKS } from '../../modules/local/multiqc_custom_peaks' @@ -33,8 +32,6 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER { save_pileup // boolean: true/false save_bed // boolean: true/false save_duplicates // boolean: true/false - detailed_annotation // boolean: true/false - main: @@ -135,20 +132,6 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER { ch_homer_annotatepeaks = HOMER_ANNOTATEPEAKS.out.txt ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS.out.versions.first()) - if (detailed_annotation) { - // - // Annotate peaks with HOMER (detailed) - // - if (genome) { - HOMER_DETAIL_ANNOTATEPEAKS ( - ch_genrich_peaks, - genome - ) - ch_homer_det_annotatepeaks = HOMER_DETAIL_ANNOTATEPEAKS.out.txt - ch_versions = ch_versions.mix(HOMER_DETAIL_ANNOTATEPEAKS.out.versions.first()) - } - } - if (!skip_peak_qc) { // // Genrich QC plots with R @@ -189,7 +172,6 @@ workflow BAM_PEAKS_CALL_QC_ANNOTATE_GENRICH_HOMER { peak_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count // channel: [ val(meta), [ counts ] ] homer_annotatepeaks = ch_homer_annotatepeaks // channel: [ val(meta), [ txt ] ] - homer_det_annotatepeaks = ch_homer_det_annotatepeaks // channel: [ val(meta), [ txt ] ] plot_genrich_qc_txt = ch_plot_genrich_qc_txt // channel: [ txt ] plot_genrich_qc_pdf = ch_plot_genrich_qc_pdf // channel: [ pdf ] diff --git a/workflows/atacseq.nf b/workflows/atacseq.nf index ee8bb98f..c0868e82 100644 --- a/workflows/atacseq.nf +++ b/workflows/atacseq.nf @@ -178,7 +178,7 @@ workflow ATACSEQ { // // Check if analyze_multimappers is set // - if (params.analyze_multimappers) { + if (params.analyze_multimappers != 0) { if (params.aligner == 'bwa' || params.aligner == 'chromap' || params.aligner == 'star') { exit 1, 'Multimapping read analysis is so far only supported for Bowtie2. Remove the --analyze_multimappers parameter or change --aligner to bowtie2.' } @@ -591,8 +591,7 @@ workflow ATACSEQ { params.save_genrich_pvalues, params.save_genrich_pileup, params.save_genrich_bed, - params.save_genrich_duplicates, - params.homer_detail_annotation + params.save_genrich_duplicates ) ch_library_genrich_sep_peaks = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.peaks ch_library_genrich_sep_frip_multiqc = MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH.out.frip_multiqc @@ -818,8 +817,7 @@ workflow ATACSEQ { params.save_genrich_pvalues, params.save_genrich_pileup, params.save_genrich_bed, - params.save_genrich_duplicates, - params.homer_detail_annotation + params.save_genrich_duplicates ) ch_library_genrich_joint_peaks = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.peaks ch_library_genrich_joint_frip_multiqc = MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH.out.frip_multiqc From c1830f979052fce16f0a2ee5f8d01b3f3f0b4c31 Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 22 Aug 2023 15:04:13 +0200 Subject: [PATCH 14/18] Removing unused peak annotation code --- modules/local/homer_detailed_ann.nf | 45 ----------------------------- 1 file changed, 45 deletions(-) delete mode 100644 modules/local/homer_detailed_ann.nf diff --git a/modules/local/homer_detailed_ann.nf b/modules/local/homer_detailed_ann.nf deleted file mode 100644 index 5d9ae345..00000000 --- a/modules/local/homer_detailed_ann.nf +++ /dev/null @@ -1,45 +0,0 @@ -process HOMER_DETAIL_ANNOTATEPEAKS { - tag "$meta.id" - label 'process_medium' - - // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - // configureHomer.pl does not work with docker or singularity, so we use conda (see: https://github.com/bioconda/bioconda-recipes/blob/master/recipes/homer/README.txt) - conda "bioconda::homer=4.11" - - input: - tuple val(meta), path(peak) - val genome - - output: - tuple val(meta), path("*annotatePeaks.detailed.txt"), emit: txt - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '4.11' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ - installed=\$(perl \$CONDA_PREFIX/share/homer*/configureHomer.pl -list) - line=\$(echo "\$installed" | grep "$genome") - yes_or_no=\${line:0:1} - - if [ \$yes_or_no != "+" ]; then - perl \$CONDA_PREFIX/share/homer*/configureHomer.pl -install $genome - fi - - annotatePeaks.pl \\ - $peak \\ - $genome \\ - $args \\ - -cpu $task.cpus \\ - > ${prefix}.annotatePeaks.detailed.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - homer: $VERSION - END_VERSIONS - """ -} From 3f4fe7ed594f5ce1c0b3a2a9b32349291753f509 Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 22 Aug 2023 15:21:00 +0200 Subject: [PATCH 15/18] fixed skip_genrich_sep config selector --- conf/modules.config | 98 +++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 48 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 6803af59..741479df 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -806,71 +806,73 @@ process { } } -process { - withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' { - ext.prefix = { "${meta.id}.mLb" } - ext.args = [ '-j', - params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '', - params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '', - params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : '' - ].join(' ').trim() - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:FRIP_SCORE' { - ext.args = '-bed -c -f 0.20' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - enabled: false - ] - } +if (!params.skip_genrich_sep) { + process { + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' { + ext.prefix = { "${meta.id}.mLb" } + ext.args = [ '-j', + params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '', + params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '', + params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : '' + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:MULTIQC_CUSTOM_PEAKS' { - ext.prefix = { "${meta.id}.mLb.genrich.speaks" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:FRIP_SCORE' { + ext.args = '-bed -c -f 0.20' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + enabled: false + ] + } -if (!params.skip_peak_annotation) { - process { - withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' { - ext.args = '-gid' + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:MULTIQC_CUSTOM_PEAKS' { ext.prefix = { "${meta.id}.mLb.genrich.speaks" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } } - if (!params.skip_peak_qc) { + if (!params.skip_peak_annotation) { process { - withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { - ext.args = '-o ./ -p genrich_speak.mLb' + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' { + ext.args = '-gid' + ext.prefix = { "${meta.id}.mLb.genrich.speaks" } publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + } - withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' { - ext.args = '-o ./' - ext.prefix = 'genrich_annotate_sPeaks.mLb' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] + if (!params.skip_peak_qc) { + process { + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { + ext.args = '-o ./ -p genrich_speak.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:MERGED_LIBRARY_SEP_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' { + ext.args = '-o ./' + ext.prefix = 'genrich_annotate_sPeaks.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/sep/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } } } } From 64ca3b1a814773669d7894e1e783f06b26dbabe0 Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 29 Aug 2023 09:51:11 +0200 Subject: [PATCH 16/18] Adding STAR analyze_multimappers support 1 --- conf/modules.config | 62 ++++++++++++++++++++++---------------------- workflows/atacseq.nf | 4 +-- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 741479df..cdc19be6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -318,7 +318,8 @@ if (params.aligner == 'star') { '--readFilesCommand zcat', '--runRNGseed 0', '--outSAMattributes NH HI AS NM MD', - params.save_unaligned ? '--outReadsUnmapped Fastx' : '' + params.save_unaligned ? '--outReadsUnmapped Fastx' : '', + params.analyze_multimappers != 0 ? "--outFilterMultimapNmax ${params.analyze_multimappers} --outAnchorMultimapNmax ${params.analyze_multimappers} * 2" : '' ].join(' ').trim() publishDir = [ [ @@ -1102,38 +1103,36 @@ if (!params.skip_merge_replicates) { } } -if (!params.skip_genrich_sep) { - process { - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' { - ext.prefix = { "${meta.id}.mLb" } - ext.args = [ '-j', - params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '', - params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '', - params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : '' - ].join(' ').trim() - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } +process { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:GENRICH' { + ext.prefix = { "${meta.id}.mLb" } + ext.args = [ '-j', + params.genrich_pvalue ? "-p ${params.genrich_pvalue}" : '', + params.genrich_qvalue ? "-q ${params.genrich_qvalue}" : '', + params.analyze_multimappers != 0 ? "-s ${params.analyze_multimappers}" : '' + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:FRIP_SCORE' { - ext.args = '-bed -c -f 0.20' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - enabled: false - ] - } + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:FRIP_SCORE' { + ext.args = '-bed -c -f 0.20' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + enabled: false + ] + } - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:MULTIQC_CUSTOM_PEAKS' { - ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:MULTIQC_CUSTOM_PEAKS' { + ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } if (!params.skip_peak_annotation) { @@ -1174,6 +1173,7 @@ if (!params.skip_genrich_sep) { } } + if (!params.skip_igv) { process { withName: 'IGV' { diff --git a/workflows/atacseq.nf b/workflows/atacseq.nf index c0868e82..9796a5e0 100644 --- a/workflows/atacseq.nf +++ b/workflows/atacseq.nf @@ -179,8 +179,8 @@ workflow ATACSEQ { // Check if analyze_multimappers is set // if (params.analyze_multimappers != 0) { - if (params.aligner == 'bwa' || params.aligner == 'chromap' || params.aligner == 'star') { - exit 1, 'Multimapping read analysis is so far only supported for Bowtie2. Remove the --analyze_multimappers parameter or change --aligner to bowtie2.' + if (params.aligner == 'bwa' || params.aligner == 'chromap') { + exit 1, 'Multimapping read analysis is so far only supported for Bowtie2 and STAR. Remove the --analyze_multimappers parameter or change --aligner to bowtie2 or star.' } } From 3b014d63001b7c44f19fe77b7b5084c9519aff83 Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Tue, 29 Aug 2023 22:52:37 +0200 Subject: [PATCH 17/18] fixed unrecognized parameter for STAR multimappers --- conf/modules.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index cdc19be6..f737435e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -319,7 +319,7 @@ if (params.aligner == 'star') { '--runRNGseed 0', '--outSAMattributes NH HI AS NM MD', params.save_unaligned ? '--outReadsUnmapped Fastx' : '', - params.analyze_multimappers != 0 ? "--outFilterMultimapNmax ${params.analyze_multimappers} --outAnchorMultimapNmax ${params.analyze_multimappers} * 2" : '' + params.analyze_multimappers != 0 ? "--outFilterMultimapNmax ${params.analyze_multimappers} --winAnchorMultimapNmax ${params.analyze_multimappers * 2}" : '' ].join(' ').trim() publishDir = [ [ From 8eb8271d495815b0131b4009665804f7975a5009 Mon Sep 17 00:00:00 2001 From: samuelruizperez Date: Fri, 1 Sep 2023 10:45:56 +0200 Subject: [PATCH 18/18] fixing peak annotation dirs --- conf/modules.config | 53 ++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f737435e..b38e0535 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1134,46 +1134,45 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } +} - if (!params.skip_peak_annotation) { +if (!params.skip_peak_annotation) { + process { + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' { + ext.args = '-gid' + ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_peak_qc) { process { - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:HOMER_ANNOTATEPEAKS' { - ext.args = '-gid' - ext.prefix = { "${meta.id}.mLb.genrich.jpeaks" } + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { + ext.args = '-o ./ -p genrich_jpeak.mLb' publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}" }, + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - } - if (!params.skip_peak_qc) { - process { - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_GENRICH_QC' { - ext.args = '-o ./ -p genrich_jpeak.mLb' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' { - ext.args = '-o ./' - ext.prefix = 'genrich_annotate_jPeaks.mLb' - publishDir = [ - path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } + withName: '.*:MERGED_LIBRARY_JOINT_CALL_ANNOTATE_PEAKS_GENRICH:PLOT_HOMER_ANNOTATEPEAKS' { + ext.args = '-o ./' + ext.prefix = 'genrich_annotate_jPeaks.mLb' + publishDir = [ + path: { "${params.outdir}/${params.aligner}/merged_library/genrich/joint/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] } } } } - if (!params.skip_igv) { process { withName: 'IGV' {