Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer similar to nf-core/atacseq #401

Merged
merged 3 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ if (!params.skip_plot_fingerprint) {
}

process {
withName: '.*:BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER:MACS2_CALLPEAK' {
withName: 'MACS2_CALLPEAK' {
ext.args = [
'--keep-dup all',
params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}",
Expand All @@ -559,15 +559,15 @@ process {
]
}

withName: '.*:BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER:FRIP_SCORE' {
withName: 'FRIP_SCORE' {
ext.args = '-bed -c -f 0.20'
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
enabled: false
]
}

withName: '.*:BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER:MULTIQC_CUSTOM_PEAKS' {
withName: 'MULTIQC_CUSTOM_PEAKS' {
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
mode: params.publish_dir_mode,
Expand All @@ -591,7 +591,7 @@ if (!params.skip_peak_annotation) {

if (!params.skip_peak_qc) {
process {
withName: '.*:BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER:PLOT_MACS2_QC' {
withName: 'PLOT_MACS2_QC' {
ext.args = '-o ./ -p macs2_peak'
publishDir = [
path: { "${params.outdir}/${params.aligner}/merged_library/macs2/${params.narrow_peak ? '/narrow_peak' : '/broad_peak'}/qc" },
Expand All @@ -600,7 +600,7 @@ if (!params.skip_peak_annotation) {
]
}

withName: '.*:BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER:PLOT_HOMER_ANNOTATEPEAKS' {
withName: 'PLOT_HOMER_ANNOTATEPEAKS' {
ext.args = '-o ./'
ext.prefix = 'macs2_annotatePeaks'
publishDir = [
Expand Down Expand Up @@ -638,7 +638,7 @@ if (!params.skip_consensus_peaks) {

if (!params.skip_peak_annotation) {
process {
withName: 'HOMER_ANNOTATEPEAKS_CONSENSUS' {
withName: '.*:BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2:HOMER_ANNOTATEPEAKS' {
ext.args = '-gid'
ext.prefix = { "${meta.id}.consensus_peaks" }
publishDir = [
Expand All @@ -661,7 +661,7 @@ if (!params.skip_consensus_peaks) {

if (!params.skip_deseq2_qc) {
process {
withName: DESEQ2_QC {
withName: 'DESEQ2_QC' {
ext.when = { meta.multiple_groups && meta.replicates_exist }
ext.args = [
'--id_col 1',
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
//
// Call consensus peaks with BEDTools and custom scripts, annotate with HOMER, quantify with featureCounts and QC with DESeq2
//

include { HOMER_ANNOTATEPEAKS } from '../../modules/nf-core/homer/annotatepeaks/main'
include { SUBREAD_FEATURECOUNTS } from '../../modules/nf-core/subread/featurecounts/main'

include { MACS2_CONSENSUS } from '../../modules/local/macs2_consensus'
include { ANNOTATE_BOOLEAN_PEAKS } from '../../modules/local/annotate_boolean_peaks'
include { DESEQ2_QC } from '../../modules/local/deseq2_qc'

workflow BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 {
take:
ch_peaks // channel: [ val(meta), [ peaks ] ]
ch_bams // channel: [ val(meta), [ ip_bams ] ]
ch_fasta // channel: [ fasta ]
ch_gtf // channel: [ gtf ]
ch_deseq2_pca_header_multiqc // channel: [ header_file ]
ch_deseq2_clustering_header_multiqc // channel: [ header_file ]
is_narrow_peak // boolean: true/false
skip_peak_annotation // boolean: true/false
skip_deseq2_qc // boolean: true/false

main:

ch_versions = Channel.empty()

// Create channels: [ meta , [ peaks ] ]
// Where meta = [ id:antibody, multiple_groups:true/false, replicates_exist:true/false ]
ch_peaks
.map {
meta, peak ->
[ meta.antibody, meta.id.split('_')[0..-2].join('_'), peak ]
}
.groupTuple()
.map {
antibody, groups, peaks ->
[
antibody,
groups.groupBy().collectEntries { [(it.key) : it.value.size()] },
peaks
]
}
.map {
antibody, groups, peaks ->
def meta_new = [:]
meta_new.id = antibody
meta_new.multiple_groups = groups.size() > 1
meta_new.replicates_exist = groups.max { groups.value }.value > 1
[ meta_new, peaks ]
}
.set { ch_antibody_peaks }

//
// Generate consensus peaks across samples
//
MACS2_CONSENSUS (
ch_antibody_peaks,
is_narrow_peak
)
ch_versions = ch_versions.mix(MACS2_CONSENSUS.out.versions)

//
// Annotate consensus peaks
//
if (!skip_peak_annotation) {
HOMER_ANNOTATEPEAKS (
MACS2_CONSENSUS.out.bed,
ch_fasta,
ch_gtf
)
ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS.out.versions)

//
// MODULE: Add boolean fields to annotated consensus peaks to aid filtering
//
ANNOTATE_BOOLEAN_PEAKS (
MACS2_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS.out.txt, by: [0]),
)
ch_versions = ch_versions.mix(ANNOTATE_BOOLEAN_PEAKS.out.versions)
}

// Create channels: [ meta, [ ip_bams ], saf ]
MACS2_CONSENSUS
.out
.saf
.map {
meta, saf ->
[ meta.id, meta, saf ]
}
.join(ch_bams)
.map {
antibody, meta, saf, bams ->
[ meta, bams.flatten().sort(), saf ]
}
.set { ch_bam_saf }

//
// Quantify peaks across samples with featureCounts
//
SUBREAD_FEATURECOUNTS (
ch_bam_saf
)
ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS.out.versions)

//
// Generate QC plots with DESeq2
//
ch_deseq2_qc_pdf = Channel.empty()
ch_deseq2_qc_rdata = Channel.empty()
ch_deseq2_qc_rds = Channel.empty()
ch_deseq2_qc_pca_txt = Channel.empty()
ch_deseq2_qc_pca_multiqc = Channel.empty()
ch_deseq2_qc_dists_txt = Channel.empty()
ch_deseq2_qc_dists_multiqc = Channel.empty()
ch_deseq2_qc_log = Channel.empty()
ch_deseq2_qc_size_factors = Channel.empty()
if (!skip_deseq2_qc) {
DESEQ2_QC (
SUBREAD_FEATURECOUNTS.out.counts,
ch_deseq2_pca_header_multiqc,
ch_deseq2_clustering_header_multiqc
)
ch_deseq2_qc_pdf = DESEQ2_QC.out.pdf
ch_deseq2_qc_rdata = DESEQ2_QC.out.rdata
ch_deseq2_qc_rds = DESEQ2_QC.out.rds
ch_deseq2_qc_pca_txt = DESEQ2_QC.out.pca_txt
ch_deseq2_qc_pca_multiqc = DESEQ2_QC.out.pca_multiqc
ch_deseq2_qc_dists_txt = DESEQ2_QC.out.dists_txt
ch_deseq2_qc_dists_multiqc = DESEQ2_QC.out.dists_multiqc
ch_deseq2_qc_log = DESEQ2_QC.out.log
ch_deseq2_qc_size_factors = DESEQ2_QC.out.size_factors
ch_versions = ch_versions.mix(DESEQ2_QC.out.versions)
}

emit:
consensus_bed = MACS2_CONSENSUS.out.bed // channel: [ bed ]
consensus_saf = MACS2_CONSENSUS.out.saf // channel: [ saf ]
consensus_pdf = MACS2_CONSENSUS.out.pdf // channel: [ pdf ]
consensus_txt = MACS2_CONSENSUS.out.txt // channel: [ pdf ]
consensus_boolean_txt = MACS2_CONSENSUS.out.boolean_txt // channel: [ txt ]
consensus_intersect_txt = MACS2_CONSENSUS.out.intersect_txt // channel: [ txt ]

featurecounts_txt = SUBREAD_FEATURECOUNTS.out.counts // channel: [ txt ]
featurecounts_summary = SUBREAD_FEATURECOUNTS.out.summary // channel: [ txt ]

deseq2_qc_pdf = ch_deseq2_qc_pdf // channel: [ pdf ]
deseq2_qc_rdata = ch_deseq2_qc_rdata // channel: [ rdata ]
deseq2_qc_rds = ch_deseq2_qc_rds // channel: [ rds ]
deseq2_qc_pca_txt = ch_deseq2_qc_pca_txt // channel: [ txt ]
deseq2_qc_pca_multiqc = ch_deseq2_qc_pca_multiqc // channel: [ txt ]
deseq2_qc_dists_txt = ch_deseq2_qc_dists_txt // channel: [ txt ]
deseq2_qc_dists_multiqc = ch_deseq2_qc_dists_multiqc // channel: [ txt ]
deseq2_qc_log = ch_deseq2_qc_log // channel: [ txt ]
deseq2_qc_size_factors = ch_deseq2_qc_size_factors // channel: [ txt ]

versions = ch_versions // channel: [ versions.yml ]
}
128 changes: 26 additions & 102 deletions workflows/chipseq.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
// MODULE: Loaded from modules/local/
//
include { MACS2_CONSENSUS } from '../modules/local/macs2_consensus'
include { ANNOTATE_BOOLEAN_PEAKS } from '../modules/local/annotate_boolean_peaks'
include { DESEQ2_QC } from '../modules/local/deseq2_qc'
include { IGV } from '../modules/local/igv'
include { MULTIQC } from '../modules/local/multiqc'
Expand All @@ -17,15 +16,16 @@ include { MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS } from '../modules/local/multiqc_c
//
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
//
include { paramsSummaryMap } from 'plugin/nf-validation'
include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_chipseq_pipeline'
include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { ALIGN_STAR } from '../subworkflows/local/align_star'
include { BAM_FILTER_BAMTOOLS } from '../subworkflows/local/bam_filter_bamtools'
include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc'
include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf'
include { paramsSummaryMap } from 'plugin/nf-validation'
include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_chipseq_pipeline'
include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { ALIGN_STAR } from '../subworkflows/local/align_star'
include { BAM_FILTER_BAMTOOLS } from '../subworkflows/local/bam_filter_bamtools'
include { BAM_BEDGRAPH_BIGWIG_BEDTOOLS_UCSC } from '../subworkflows/local/bam_bedgraph_bigwig_bedtools_ucsc'
include { BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER } from '../subworkflows/local/bam_peaks_call_qc_annotate_macs2_homer.nf'
include { BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 } from '../subworkflows/local/bed_consensus_quantify_qc_bedtools_featurecounts_deseq2.nf'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -476,63 +476,6 @@ workflow CHIPSEQ {
ch_deseq2_pca_multiqc = Channel.empty()
ch_deseq2_clustering_multiqc = Channel.empty()
if (!params.skip_consensus_peaks) {
// Create channels: [ meta , [ peaks ] ]
// Where meta = [ id:antibody, multiple_groups:true/false, replicates_exist:true/false ]
BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER.out.peaks
.map {
meta, peak ->
[ meta.antibody, meta.id.split('_')[0..-2].join('_'), peak ]
}
.groupTuple()
.map {
antibody, groups, peaks ->
[
antibody,
groups.groupBy().collectEntries { [(it.key) : it.value.size()] },
peaks
]
}
.map {
antibody, groups, peaks ->
def meta_new = [:]
meta_new.id = antibody
meta_new.multiple_groups = groups.size() > 1
meta_new.replicates_exist = groups.max { groups.value }.value > 1
[ meta_new, peaks ]
}
.set { ch_antibody_peaks }

//
// MODULE: Generate consensus peaks across samples
//
MACS2_CONSENSUS (
ch_antibody_peaks,
params.narrow_peak
)
ch_macs2_consensus_bed_lib = MACS2_CONSENSUS.out.bed
ch_macs2_consensus_txt_lib = MACS2_CONSENSUS.out.txt
ch_versions = ch_versions.mix(MACS2_CONSENSUS.out.versions)

if (!params.skip_peak_annotation) {
//
// MODULE: Annotate consensus peaks
//
HOMER_ANNOTATEPEAKS_CONSENSUS (
MACS2_CONSENSUS.out.bed,
ch_fasta,
ch_gtf
)
ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS_CONSENSUS.out.versions)

//
// MODULE: Add boolean fields to annotated consensus peaks to aid filtering
//
ANNOTATE_BOOLEAN_PEAKS (
MACS2_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS_CONSENSUS.out.txt, by: [0]),
)
ch_versions = ch_versions.mix(ANNOTATE_BOOLEAN_PEAKS.out.versions)
}

// Create channels: [ antibody, [ ip_bams ] ]
ch_ip_control_bam
.map {
Expand All @@ -542,42 +485,23 @@ workflow CHIPSEQ {
.groupTuple()
.set { ch_antibody_bams }

// Create channels: [ meta, [ ip_bams ], saf ]
MACS2_CONSENSUS
.out
.saf
.map {
meta, saf ->
[ meta.id, meta, saf ]
}
.join(ch_antibody_bams)
.map {
antibody, meta, saf, bams ->
[ meta, bams.flatten().sort(), saf ]
}
.set { ch_saf_bams }

//
// MODULE: Quantify peaks across samples with featureCounts
//
SUBREAD_FEATURECOUNTS (
ch_saf_bams
BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2 (
BAM_PEAKS_CALL_QC_ANNOTATE_MACS2_HOMER.out.peaks,
ch_antibody_bams,
ch_fasta,
ch_gtf,
ch_deseq2_pca_header,
ch_deseq2_clustering_header,
params.narrow_peak,
params.skip_peak_annotation,
params.skip_deseq2_qc
)
ch_subreadfeaturecounts_multiqc = SUBREAD_FEATURECOUNTS.out.summary
ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS.out.versions.first())

if (!params.skip_deseq2_qc) {
//
// MODULE: Generate QC plots with DESeq2
//
DESEQ2_QC (
SUBREAD_FEATURECOUNTS.out.counts,
ch_deseq2_pca_header,
ch_deseq2_clustering_header
)
ch_deseq2_pca_multiqc = DESEQ2_QC.out.pca_multiqc
ch_deseq2_clustering_multiqc = DESEQ2_QC.out.dists_multiqc
}
ch_macs2_consensus_bed_lib = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.consensus_bed
ch_macs2_consensus_txt_lib = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.consensus_txt
ch_subreadfeaturecounts_multiqc = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.featurecounts_summary
ch_deseq2_pca_multiqc = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.deseq2_qc_pca_multiqc
ch_deseq2_clustering_multiqc = BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.deseq2_qc_dists_multiqc
ch_versions = ch_versions.mix(BED_CONSENSUS_QUANTIFY_QC_BEDTOOLS_FEATURECOUNTS_DESEQ2.out.versions)
}

//
Expand Down
Loading