From 14c85e64be29435f94ce527d3ad4ee3c163de71a Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 6 Jun 2019 16:31:10 +0100 Subject: [PATCH 001/113] Bump version to 1.0.1dev --- .travis.yml | 2 +- Dockerfile | 2 +- environment.yml | 2 +- nextflow.config | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.travis.yml b/.travis.yml index df2d29da..6140ebab 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ before_install: - docker pull nfcore/chipseq:dev # Fake the tag locally so that the pipeline runs properly # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1) - - docker tag nfcore/chipseq:dev nfcore/chipseq:1.0.0 + - docker tag nfcore/chipseq:dev nfcore/chipseq:dev install: # Install Nextflow diff --git a/Dockerfile b/Dockerfile index 42d5a2be..ce625eee 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,4 +4,4 @@ LABEL authors="Philip Ewels" \ COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -ENV PATH /opt/conda/envs/nf-core-chipseq-1.0.0/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-chipseq-1.0.1dev/bin:$PATH diff --git a/environment.yml b/environment.yml index fea6ef21..357755c7 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-chipseq-1.0.0 +name: nf-core-chipseq-1.0.1dev channels: - conda-forge - bioconda diff --git a/nextflow.config b/nextflow.config index 0ad95086..c1170223 100644 --- a/nextflow.config +++ b/nextflow.config @@ -86,7 +86,7 @@ params { // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'nfcore/chipseq:1.0.0' +process.container = 'nfcore/chipseq:dev' // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -142,7 +142,7 @@ manifest { description = 'ChIP-seq peak-calling and differential analysis pipeline.' mainScript = 'main.nf' nextflowVersion = '>=0.32.0' - version = '1.0.0' + version = '1.0.1dev' } // Function to ensure that resource requirements don't go beyond From 16dfcc68d089507cb4024df07214f9199aa2a9a5 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 6 Jun 2019 16:36:12 +0100 Subject: [PATCH 002/113] Update README.md --- CHANGELOG.md | 9 +++++++++ README.md | 3 ++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d16b9edc..64d3f12d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [Unpublished Version / DEV] + +### `Added` + +### `Fixed` + +### `Dependencies` + + ## [1.0.0] - 2019-06-06 Initial release of nf-core/chipseq pipeline. diff --git a/README.md b/README.md index 5a6d5f16..27558b76 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) [![Docker](https://img.shields.io/docker/automated/nfcore/chipseq.svg)](https://hub.docker.com/r/nfcore/chipseq/) +[![DOI](https://zenodo.org/badge/130877729.svg)](https://zenodo.org/badge/latestdoi/130877729) ## Introduction **nfcore/chipseq** is a bioinformatics analysis pipeline used for Chromatin ImmunopreciPitation sequencing (ChIP-seq) data. @@ -62,7 +63,7 @@ Many thanks to others who have helped out along the way too, including (but not ## Citation - +If you use nf-core/chipseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3240507](https://doi.org/10.5281/zenodo.3240507) You can cite the `nf-core` pre-print as follows: Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**. *bioRxiv*. 2019. p. 610741. [doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1). From c49b5ee674650c699c3de5a2dcc7afd32be254b5 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Sat, 27 Jul 2019 09:17:58 +0100 Subject: [PATCH 003/113] Minor update --- docs/usage.md | 1 + main.nf | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 7da137df..82c88005 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -386,6 +386,7 @@ The following options make this easy: | `--skipPlotProfile` | Skip deepTools plotProfile | | `--skipPlotFingerprint` | Skip deepTools plotFingerprint | | `--skipSpp` | Skip Phantompeakqualtools | +| `--skipDANPOS` | Skip DANPOS2 | | `--skipIGV` | Skip IGV | | `--skipMultiQC` | Skip MultiQC | diff --git a/main.nf b/main.nf index f904304d..e841ef01 100755 --- a/main.nf +++ b/main.nf @@ -494,7 +494,7 @@ if (params.skipTrimming){ publishDir "${params.outdir}/trim_galore", mode: 'copy', saveAs: {filename -> if (filename.endsWith(".html")) "fastqc/$filename" - else if (filename.endsWith(".zip")) "fastqc/zip/$filename" + else if (filename.endsWith(".zip")) "fastqc/zips/$filename" else if (filename.endsWith("trimming_report.txt")) "logs/$filename" else params.saveTrimmed ? filename : null } From 92cd01599c37519ae89a08959491936b1eff2401 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 20 Aug 2019 11:49:57 +0100 Subject: [PATCH 004/113] Add quote field to read.table --- bin/plot_homer_annotatepeaks.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/plot_homer_annotatepeaks.r b/bin/plot_homer_annotatepeaks.r index 1eb0ec3c..ee537fce 100755 --- a/bin/plot_homer_annotatepeaks.r +++ b/bin/plot_homer_annotatepeaks.r @@ -57,7 +57,7 @@ plot.feature.dat <- data.frame() for (idx in 1:length(HomerFiles)) { sampleid = SampleIDs[idx] - anno.dat <- read.table(HomerFiles[idx], sep="\t", header=TRUE) + anno.dat <- read.table(HomerFiles[idx], sep="\t", header=TRUE,quote="") anno.dat <- anno.dat[,c("Annotation","Distance.to.TSS","Nearest.PromoterID")] anno.dat <- anno.dat[which(!is.na(anno.dat$Distance.to.TSS)),] if (nrow(anno.dat) == 0) { From 4972b1d13ec7092b1ffc6edefae0e366aecad7db Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 26 Sep 2019 13:38:20 +0100 Subject: [PATCH 005/113] Fixed bug with missing variable name --- bin/check_design.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/check_design.py b/bin/check_design.py index ed87fd6c..751932b0 100755 --- a/bin/check_design.py +++ b/bin/check_design.py @@ -159,7 +159,7 @@ def reformat_design(DesignFile,ReadMappingFile,ControlMappingFile): if not antibodyList in antibodyGroupDict[antibody][group]: antibodyGroupDict[antibody][group].append(antibodyList) else: - print "{}: Control id not a valid group\nControl id: {}, Valid Groups: {}".format(ERROR_STR,groupControlDict[group],sorted(sampleMappingDict.keys())) + print "{}: Control id not a valid group\nControl id: {}, Valid Groups: {}".format(ERROR_STR,control,sorted(sampleMappingDict.keys())) sys.exit(1) fout.close() From ca8f742a7b02cb12b8fcd50b5d7909cd0dd515b3 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 26 Sep 2019 13:38:51 +0100 Subject: [PATCH 006/113] Fix macs2 to macs output directory --- docs/output.md | 12 ++++++------ docs/usage.md | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/output.md b/docs/output.md index 91eddfa5..5ad6f35c 100644 --- a/docs/output.md +++ b/docs/output.md @@ -174,11 +174,11 @@ The library-level alignments associated with the same sample are merged and subs `` in the directory structure below corresponds to the type of peak that you have specified to call with MACS2 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrowPeak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline. Also, the IGV session file and MultiQC reports in the results directory will be overwritten with the latest output so you may want to rename/move these beforehand. *Output directories*: - * `bwa/mergedLibrary/macs2//` + * `bwa/mergedLibrary/macs//` * MACS2 output files: `*.xls`, `*.broadPeak` or `*.narrowPeak`, `*.gappedPeak` and `*summits.bed`. The files generated will depend on whether MACS2 has been run in *narrowPeak* or *broadPeak* mode. * HOMER peak-to-gene annotation file: `*.annotatePeaks.txt`. - * `bwa/mergedLibrary/macs2//qc/` + * `bwa/mergedLibrary/macs//qc/` * QC plots for MACS2 peaks: `macs_peak.plots.pdf` * QC plots for peak-to-gene feature annotation: `macs_annotatePeaks.plots.pdf` * MultiQC custom-content files for FRiP score, peak count and peak-to-gene ratios: `*.FRiP_mqc.tsv`, `*.count_mqc.tsv` and `macs_annotatePeaks.summary_mqc.tsv` respectively. @@ -198,7 +198,7 @@ The library-level alignments associated with the same sample are merged and subs ![R - UpSetR peak intersection plot](images/r_upsetr_intersect_plot.png) *Output directories*: - * `bwa/mergedLibrary/macs2//consensus/` + * `bwa/mergedLibrary/macs//consensus/` * Consensus peak-set across all samples in `*.bed` format. * Consensus peak-set across all samples in `*.saf` format. Required by featureCounts for read quantification. * HOMER `*.annotatePeaks.txt` peak-to-gene annotation file for consensus peaks. @@ -233,19 +233,19 @@ The library-level alignments associated with the same sample are merged and subs ![R - DESeq2 Volcano plot](images/r_deseq2_volcano_plot.png) *Output directories*: - * `bwa/mergedLibrary/macs2//consensus//deseq2/` + * `bwa/mergedLibrary/macs//consensus//deseq2/` * `.featureCounts.txt` file for read counts across all samples relative to consensus peak-set. * Differential binding `*.results.txt` spreadsheet containing results across all consensus peaks and all comparisons. * `*.plots.pdf` file for PCA and hierarchical clustering. * `*.log` file with information for number of differentially bound intervals at different FDR and fold-change thresholds for each comparison. * `*.dds.rld.RData` file containing R `dds` and `rld` objects generated by DESeq2. * `R_sessionInfo.log` file containing information about R, the OS and attached or loaded packages. - * `bwa/mergedLibrary/macs2//consensus///` + * `bwa/mergedLibrary/macs//consensus///` * `*.results.txt` spreadsheet containing comparison-specific DESeq2 output for differential binding results across all peaks. * Subset of above file for peaks that pass FDR <= 0.01 (`*FDR0.01.results.txt`) and FDR <= 0.05 (`*FDR0.05.results.txt`). * BED files for peaks that pass FDR <= 0.01 (`*FDR0.01.results.bed`) and FDR <= 0.05 (`*FDR0.05.results.bed`). * MA, Volcano, clustering and scatterplots at FDR <= 0.01 and FDR <= 0.05: `*deseq2.plots.pdf`. - * `bwa/mergedLibrary/macs2//consensus//sizeFactors/` + * `bwa/mergedLibrary/macs//consensus//sizeFactors/` Files containing DESeq2 sizeFactors per sample: `*.txt` and `*.RData`. ## Aggregate analysis diff --git a/docs/usage.md b/docs/usage.md index 82c88005..7da137df 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -386,7 +386,6 @@ The following options make this easy: | `--skipPlotProfile` | Skip deepTools plotProfile | | `--skipPlotFingerprint` | Skip deepTools plotFingerprint | | `--skipSpp` | Skip Phantompeakqualtools | -| `--skipDANPOS` | Skip DANPOS2 | | `--skipIGV` | Skip IGV | | `--skipMultiQC` | Skip MultiQC | From 532cb920f35512d4c0ce2542e39e3d30c85cd857 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 26 Sep 2019 14:13:11 +0100 Subject: [PATCH 007/113] Add bed12 from iGenomes --- conf/igenomes.config | 26 ++++++++++++++++++++++++++ main.nf | 2 +- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/conf/igenomes.config b/conf/igenomes.config index f95c8dd4..7fe643ec 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -14,6 +14,7 @@ fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" blacklist = "${baseDir}/assets/blacklists/GRCh37-blacklist.bed" macs_gsize = "2.7e9" } @@ -21,6 +22,7 @@ fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" macs_gsize = "2.7e9" } @@ -28,6 +30,7 @@ fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" blacklist = "${baseDir}/assets/blacklists/GRCm38-blacklist.bed" macs_gsize = "1.87e9" } @@ -35,110 +38,131 @@ fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" } 'EB2' { fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" } 'UMD3.1' { fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" } 'WBcel235' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" macs_gsize = "9e7" } 'CanFam3.1' { fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" } 'GRCz10' { fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" } 'BDGP6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" macs_gsize = "1.2e8" } 'EquCab2' { fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" } 'EB1' { fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" } 'Galgal4' { fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" } 'Gm01' { fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" } 'Mmul_1' { fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" } 'IRGSP-1.0' { fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" } 'CHIMP2.1.4' { fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" } 'Rnor_6.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" } 'R64-1-1' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" macs_gsize = "1.2e7" } 'EF2' { fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" macs_gsize = "1.21e7" } 'Sbi1' { fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" } 'Sscrofa10.2' { fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" } 'AGPv3' { fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" } 'hg38' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" macs_gsize = "2.7e9" } @@ -146,6 +170,7 @@ fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" blacklist = "${baseDir}/assets/blacklists/hg19-blacklist.bed" macs_gsize = "2.7e9" } @@ -153,6 +178,7 @@ fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa" gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" blacklist = "${baseDir}/assets/blacklists/mm10-blacklist.bed" macs_gsize = "1.87e9" } diff --git a/main.nf b/main.nf index e841ef01..bfe205df 100755 --- a/main.nf +++ b/main.nf @@ -114,7 +114,7 @@ if (params.genomes && params.genome && !params.genomes.containsKey(params.genome params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false params.bwa_index = params.genome ? params.genomes[ params.genome ].bwa ?: false : false params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false -params.gene_bed = params.genome ? params.genomes[ params.genome ].gene_bed ?: false : false +params.gene_bed = params.genome ? params.genomes[ params.genome ].bed12 ?: false : false params.macs_gsize = params.genome ? params.genomes[ params.genome ].macs_gsize ?: false : false params.blacklist = params.genome ? params.genomes[ params.genome ].blacklist ?: false : false From b0092b9895de0e07770fe0950c9d313710188907 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 26 Sep 2019 16:14:15 +0100 Subject: [PATCH 008/113] Add def declarations --- main.nf | 95 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/main.nf b/main.nf index bfe205df..76d4b55f 100755 --- a/main.nf +++ b/main.nf @@ -421,7 +421,7 @@ process makeGenomeFilter { file "*.sizes" into ch_genome_sizes_bigwig // CHROMOSOME SIZES FILE FOR BEDTOOLS script: - blacklist_filter = params.blacklist ? "sortBed -i ${params.blacklist} -g ${fasta}.sizes | complementBed -i stdin -g ${fasta}.sizes" : "awk '{print \$1, '0' , \$2}' OFS='\t' ${fasta}.sizes" + def blacklist_filter = params.blacklist ? "sortBed -i ${params.blacklist} -g ${fasta}.sizes | complementBed -i stdin -g ${fasta}.sizes" : "awk '{print \$1, '0' , \$2}' OFS='\t' ${fasta}.sizes" """ samtools faidx $fasta cut -f 1,2 ${fasta}.fai > ${fasta}.sizes @@ -509,10 +509,10 @@ if (params.skipTrimming){ script: // Added soft-links to original fastqs for consistent naming in MultiQC - c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' - c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' - tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' - tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' + def c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' + def c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' + def tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' + def tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' if (params.singleEnd) { """ [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz @@ -551,10 +551,9 @@ process bwaMEM { set val(name), file("*.bam") into ch_bwa_bam script: - prefix="${name}.Lb" - if (!params.seq_center) { - rg="\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'" - } else { + def prefix="${name}.Lb" + def rg="\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'" + if (params.seq_center) { rg="\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\\tCN:${params.seq_center}\'" } """ @@ -591,7 +590,7 @@ process sortBAM { file "*.{flagstat,idxstats,stats}" into ch_sort_bam_flagstat_mqc script: - prefix="${name}.Lb" + def prefix="${name}.Lb" """ samtools sort -@ $task.cpus -o ${prefix}.sorted.bam -T $name $bam samtools index ${prefix}.sorted.bam @@ -639,11 +638,11 @@ process mergeBAM { file "*.txt" into ch_merge_bam_metrics_mqc script: - prefix="${name}.mLb.mkD" - bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() + def prefix="${name}.mLb.mkD" + def bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() + def avail_mem = 3 if (!task.memory){ log.info "[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this." - avail_mem = 3 } else { avail_mem = task.memory.toGiga() } @@ -718,12 +717,12 @@ process filterBAM { file "*.{idxstats,stats}" into ch_filter_bam_stats_mqc script: - prefix = params.singleEnd ? "${name}.mLb.clN" : "${name}.mLb.flT" - filter_params = params.singleEnd ? "-F 0x004" : "-F 0x004 -F 0x0008 -f 0x001" - dup_params = params.keepDups ? "" : "-F 0x0400" - multimap_params = params.keepMultiMap ? "" : "-q 1" - blacklist_params = params.blacklist ? "-L $bed" : "" - name_sort_bam = params.singleEnd ? "" : "samtools sort -n -@ $task.cpus -o ${prefix}.bam -T $prefix ${prefix}.sorted.bam" + def prefix = params.singleEnd ? "${name}.mLb.clN" : "${name}.mLb.flT" + def filter_params = params.singleEnd ? "-F 0x004" : "-F 0x004 -F 0x0008 -f 0x001" + def dup_params = params.keepDups ? "" : "-F 0x0400" + def multimap_params = params.keepMultiMap ? "" : "-q 1" + def blacklist_params = params.blacklist ? "-L $bed" : "" + def name_sort_bam = params.singleEnd ? "" : "samtools sort -n -@ $task.cpus -o ${prefix}.bam -T $prefix ${prefix}.sorted.bam" """ samtools view \\ $filter_params \\ @@ -788,7 +787,7 @@ if (params.singleEnd){ file "*.{idxstats,stats}" into ch_rm_orphan_stats_mqc script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - prefix="${name}.mLb.clN" + def prefix="${name}.mLb.clN" """ bampe_rm_orphan.py ${bam[0]} ${prefix}.bam --only_fr_pairs @@ -827,7 +826,7 @@ process preseq { file "*.ccurve.txt" into ch_preseq_results script: - prefix="${name}.mLb.clN" + def prefix="${name}.mLb.clN" """ preseq lc_extrap -v -output ${prefix}.ccurve.txt -bam ${bam[0]} """ @@ -858,10 +857,10 @@ process collectMultipleMetrics { file "*.pdf" into ch_collectmetrics_pdf script: - prefix="${name}.mLb.clN" + def prefix="${name}.mLb.clN" + def avail_mem = 3 if (!task.memory){ log.info "[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this." - avail_mem = 3 } else { avail_mem = task.memory.toGiga() } @@ -898,9 +897,9 @@ process bigWig { file "*igv.txt" into ch_bigwig_igv script: - prefix="${name}.mLb.clN" - pe_fragment = params.singleEnd ? "" : "-pc" - extend = (params.singleEnd && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' + def prefix="${name}.mLb.clN" + def pe_fragment = params.singleEnd ? "" : "-pc" + def extend = (params.singleEnd && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' """ SCALE_FACTOR=\$(grep 'mapped (' $flagstat | awk '{print 1000000/\$1}') echo \$SCALE_FACTOR > ${prefix}.scale_factor.txt @@ -1024,7 +1023,7 @@ process plotFingerprint { file '*.raw.txt' into ch_plotfingerprint_mqc script: - extend = (params.singleEnd && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' + def extend = (params.singleEnd && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' """ plotFingerprint \\ --bamfiles ${ipbam[0]} ${controlbam[0]} \\ @@ -1068,10 +1067,10 @@ process macsCallPeak { file "*_mqc.tsv" into ch_macs_mqc script: - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - broad = params.narrowPeak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" - format = params.singleEnd ? "BAM" : "BAMPE" - pileup = params.saveMACSPileup ? "-B --SPMR" : "" + def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + def broad = params.narrowPeak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" + def format = params.singleEnd ? "BAM" : "BAMPE" + def pileup = params.saveMACSPileup ? "-B --SPMR" : "" """ macs2 callpeak \\ -t ${ipbam[0]} \\ @@ -1113,7 +1112,7 @@ process annotatePeaks { file "*.txt" into ch_macs_annotate script: - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ annotatePeaks.pl $peak \\ $fasta \\ @@ -1143,7 +1142,7 @@ process peakQC { file "*.tsv" into ch_macs_qc_mqc script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ plot_macs_qc.r -i ${peaks.join(',')} \\ -s ${peaks.join(',').replaceAll("_peaks.${peaktype}","")} \\ @@ -1199,11 +1198,11 @@ process createConsensusPeakSet { file "*igv.txt" into ch_macs_consensus_igv script: // scripts are bundled with the pipeline, in nf-core/chipseq/bin/ - prefix="${antibody}.consensus_peaks" - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - mergecols = params.narrowPeak ? (2..10).join(',') : (2..9).join(',') - collapsecols = params.narrowPeak ? (["collapse"]*9).join(',') : (["collapse"]*8).join(',') - expandparam = params.narrowPeak ? "--is_narrow_peak" : "" + def prefix="${antibody}.consensus_peaks" + def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + def mergecols = params.narrowPeak ? (2..10).join(',') : (2..9).join(',') + def collapsecols = params.narrowPeak ? (["collapse"]*9).join(',') : (["collapse"]*8).join(',') + def expandparam = params.narrowPeak ? "--is_narrow_peak" : "" """ sort -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\ | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt @@ -1246,8 +1245,8 @@ process annotateConsensusPeakSet { file "*.annotatePeaks.txt" into ch_macs_consensus_annotate script: - prefix="${antibody}.consensus_peaks" - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + def prefix="${antibody}.consensus_peaks" + def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ annotatePeaks.pl $bed \\ $fasta \\ @@ -1300,11 +1299,11 @@ process deseqConsensusPeakSet { file "*.tsv" into ch_macs_consensus_deseq_mqc script: - prefix="${antibody}.consensus_peaks" - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() - bam_ext = params.singleEnd ? ".mLb.clN.sorted.bam" : ".mLb.clN.bam" - pe_params = params.singleEnd ? '' : "-p --donotsort" + def prefix="${antibody}.consensus_peaks" + def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + def bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() + def bam_ext = params.singleEnd ? ".mLb.clN.sorted.bam" : ".mLb.clN.bam" + def pe_params = params.singleEnd ? '' : "-p --donotsort" """ featureCounts -F SAF \\ -O \\ @@ -1468,9 +1467,9 @@ process multiqc { file "multiqc_plots" script: - rtitle = custom_runName ? "--title \"$custom_runName\"" : '' - rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - mqcstats = params.skipMultiQCStats ? '--cl_config "skip_generalstats: true"' : '' + def rtitle = custom_runName ? "--title \"$custom_runName\"" : '' + def rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' + def mqcstats = params.skipMultiQCStats ? '--cl_config "skip_generalstats: true"' : '' """ multiqc . -f $rtitle $rfilename --config $multiqc_config \\ -m custom_content -m fastqc -m cutadapt -m samtools -m picard -m preseq -m featureCounts -m deeptools -m phantompeakqualtools \\ From 04cbb86e9c810b40f5b3a5b747066b6c15556c6a Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 27 Sep 2019 14:20:39 +0100 Subject: [PATCH 009/113] Fix bullets --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 27558b76..69060e63 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. -### Pipeline summary +## Pipeline summary 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) @@ -44,7 +44,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool 6. Create IGV session file containing bigWig tracks, peaks and differential sites for data visualisation ([`IGV`](https://software.broadinstitute.org/software/igv/)). 7. Present QC for raw read, alignment, peak-calling and differential binding results ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/)) -### Documentation +## Documentation The nf-core/chipseq pipeline comes with documentation about the pipeline, found in the `docs/` directory: 1. [Installation](https://nf-co.re/usage/installation) From 9378d449dd1d045edd0bd46ca6b183fbf0244438 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 30 Sep 2019 22:15:06 +0100 Subject: [PATCH 010/113] Increase default resources --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index b52774f2..64c7e4a3 100644 --- a/conf/base.config +++ b/conf/base.config @@ -12,7 +12,7 @@ process { cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 2.GB * task.attempt, 'memory' ) } + memory = { check_max( 8.GB * task.attempt, 'memory' ) } time = { check_max( 2.h * task.attempt, 'time' ) } errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } From 1cf32a1dc3a6d55d92090597e63e49aeea9f8220 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 30 Sep 2019 22:15:19 +0100 Subject: [PATCH 011/113] Change process names --- main.nf | 57 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/main.nf b/main.nf index 76d4b55f..87c45a95 100755 --- a/main.nf +++ b/main.nf @@ -159,7 +159,7 @@ if (params.design) { ch_design = file(params.design, checkIfExists: true) } e if (params.gtf) { ch_gtf = file(params.gtf, checkIfExists: true) } else { exit 1, "GTF annotation file not specified!" } if (params.gene_bed) { ch_gene_bed = file(params.gene.bed, checkIfExists: true) } if (params.tss_bed) { ch_tss_bed = file(params.tss_bed, checkIfExists: true) } -if (params.blacklist) { ch_blacklist = file(params.blacklist, checkIfExists: true) } +if (params.blacklist) { ch_blacklist = file(params.blacklist, checkIfExists: true) } else { ch_blacklist = Channel.empty() } if (params.fasta){ lastPath = params.fasta.lastIndexOf(File.separator) @@ -290,7 +290,7 @@ if (!params.macs_gsize){ /* * PREPROCESSING - REFORMAT DESIGN FILE, CHECK VALIDITY & CREATE IP vs CONTROL MAPPINGS */ -process checkDesign { +process CheckDesign { tag "$design" publishDir "${params.outdir}/pipeline_info", mode: 'copy' @@ -341,7 +341,7 @@ if (params.singleEnd) { * PREPROCESSING - Build BWA index */ if (!params.bwa_index){ - process makeBWAindex { + process BWAIndex { tag "$fasta" label 'process_high' publishDir path: { params.saveGenomeIndex ? "${params.outdir}/reference_genome" : params.outdir }, @@ -365,7 +365,7 @@ if (!params.bwa_index){ * PREPROCESSING - Generate gene BED file */ if (!params.gene_bed){ - process makeGeneBED { + process MakeGeneBED { tag "$gtf" label 'process_low' publishDir "${params.outdir}/reference_genome", mode: 'copy' @@ -387,7 +387,7 @@ if (!params.gene_bed){ * PREPROCESSING - Generate TSS BED file */ if (!params.tss_bed){ - process makeTSSBED { + process MakeTSSBED { tag "$bed" publishDir "${params.outdir}/reference_genome", mode: 'copy' @@ -407,12 +407,13 @@ if (!params.tss_bed){ /* * PREPROCESSING - Prepare genome intervals for filtering */ -process makeGenomeFilter { +process MakeGenomeFilter { tag "$fasta" publishDir "${params.outdir}/reference_genome", mode: 'copy' input: file fasta from ch_fasta + file blacklist from ch_blacklist output: file "$fasta" into ch_genome_fasta // FASTA FILE FOR IGV @@ -421,7 +422,7 @@ process makeGenomeFilter { file "*.sizes" into ch_genome_sizes_bigwig // CHROMOSOME SIZES FILE FOR BEDTOOLS script: - def blacklist_filter = params.blacklist ? "sortBed -i ${params.blacklist} -g ${fasta}.sizes | complementBed -i stdin -g ${fasta}.sizes" : "awk '{print \$1, '0' , \$2}' OFS='\t' ${fasta}.sizes" + def blacklist_filter = params.blacklist ? "sortBed -i $blacklist -g ${fasta}.sizes | complementBed -i stdin -g ${fasta}.sizes" : "awk '{print \$1, '0' , \$2}' OFS='\t' ${fasta}.sizes" """ samtools faidx $fasta cut -f 1,2 ${fasta}.fai > ${fasta}.sizes @@ -440,7 +441,7 @@ process makeGenomeFilter { /* * STEP 1 - FastQC */ -process fastqc { +process FastQC { tag "$name" label 'process_medium' publishDir "${params.outdir}/fastqc", mode: 'copy', @@ -488,7 +489,7 @@ if (params.skipTrimming){ ch_trimgalore_results_mqc = [] ch_trimgalore_fastqc_reports_mqc = [] } else { - process trimGalore { + process TrimGalore { tag "$name" label 'process_long' publishDir "${params.outdir}/trim_galore", mode: 'copy', @@ -539,7 +540,7 @@ if (params.skipTrimming){ /* * STEP 3.1 - Align read 1 with bwa */ -process bwaMEM { +process BWAmem { tag "$name" label 'process_high' @@ -570,7 +571,7 @@ process bwaMEM { /* * STEP 3.2 - Convert .bam to coordinate sorted .bam */ -process sortBAM { +process SortBAM { tag "$name" label 'process_medium' if (params.saveAlignedIntermediates) { @@ -616,7 +617,7 @@ ch_sort_bam_merge.map { it -> [ it[0].split('_')[0..-2].join('_'), it[1] ] } .map { it -> [ it[0], it[1].flatten() ] } .set { ch_sort_bam_merge } -process mergeBAM { +process MergeBAM { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary", mode: 'copy', @@ -692,7 +693,7 @@ process mergeBAM { /* * STEP 4.2 Filter BAM file at merged library-level */ -process filterBAM { +process FilterBAM { tag "$name" label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: 'copy', @@ -758,7 +759,7 @@ if (params.singleEnd){ ch_rm_orphan_flagstat_mqc } ch_filter_bam_stats_mqc.set { ch_rm_orphan_stats_mqc } } else { - process rmOrphanReads { + process RemoveOrphans { tag "$name" label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: 'copy', @@ -811,7 +812,7 @@ if (params.singleEnd){ /* * STEP 5.1 preseq analysis after merging libraries and before filtering */ -process preseq { +process Preseq { tag "$name" label 'process_low' publishDir "${params.outdir}/bwa/mergedLibrary/preseq", mode: 'copy' @@ -835,7 +836,7 @@ process preseq { /* * STEP 5.2 Picard CollectMultipleMetrics after merging libraries and filtering */ -process collectMultipleMetrics { +process CollectMultipleMetrics { tag "$name" label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: 'copy', @@ -877,7 +878,7 @@ process collectMultipleMetrics { /* * STEP 5.3 Read depth normalised bigWig */ -process bigWig { +process BigWig { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/bigwig", mode: 'copy', @@ -914,7 +915,7 @@ process bigWig { /* * STEP 5.4 generate gene body coverage plot with deepTools */ -process plotProfile { +process PlotProfile { tag "$name" label 'process_high' publishDir "${params.outdir}/bwa/mergedLibrary/deepTools/plotProfile", mode: 'copy' @@ -953,7 +954,7 @@ process plotProfile { /* * STEP 5.5 Phantompeakqualtools */ -process phantomPeakQualTools { +process PhantomPeakQualTools { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/phantompeakqualtools", mode: 'copy' @@ -1007,7 +1008,7 @@ ch_design_controls_csv.combine(ch_rm_orphan_bam_macs_1) /* * STEP 6.1 deepTools plotFingerprint */ -process plotFingerprint { +process PlotFingerprint { tag "${ip} vs ${control}" label 'process_high' publishDir "${params.outdir}/bwa/mergedLibrary/deepTools/plotFingerprint", mode: 'copy' @@ -1042,7 +1043,7 @@ process plotFingerprint { /* * STEP 6.2 Call peaks with MACS2 and calculate FRiP score */ -process macsCallPeak { +process MACSCallPeak { tag "${ip} vs ${control}" label 'process_long' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}", mode: 'copy', @@ -1095,7 +1096,7 @@ process macsCallPeak { /* * STEP 6.3 Annotate peaks with HOMER */ -process annotatePeaks { +process AnnotatePeaks { tag "${ip} vs ${control}" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}", mode: 'copy' @@ -1125,7 +1126,7 @@ process annotatePeaks { /* * STEP 6.4 Aggregated QC plots for peaks, FRiP and peak-to-gene annotation */ -process peakQC { +process PeakQC { label "process_medium" publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/qc", mode: 'copy' @@ -1175,7 +1176,7 @@ ch_macs_consensus.map { it -> [ it[0], it[1], it[2], it[-1] ] } /* * STEP 7.1 Consensus peaks across samples, create boolean filtering file, .saf file for featureCounts and UpSetR plot for intersection */ -process createConsensusPeakSet { +process CreateConsensusPeakSet { tag "${antibody}" label 'process_long' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}", mode: 'copy', @@ -1227,7 +1228,7 @@ process createConsensusPeakSet { /* * STEP 7.2 Annotate consensus peaks with HOMER, and add annotation to boolean output file */ -process annotateConsensusPeakSet { +process AnnotateConsensusPeakSet { tag "${antibody}" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}", mode: 'copy' @@ -1271,7 +1272,7 @@ ch_group_bam_deseq.map { it -> [ it[3], [ it[0], it[1], it[2] ] ] } /* * STEP 7.3 Count reads in consensus peaks with featureCounts and perform differential analysis with DESeq2 */ -process deseqConsensusPeakSet { +process DeseqConsensusPeakSet { tag "${antibody}" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}/deseq2", mode: 'copy', @@ -1339,7 +1340,7 @@ process deseqConsensusPeakSet { /* * STEP 8 - Create IGV session file */ -process igv { +process IGV { publishDir "${params.outdir}/igv", mode: 'copy' when: @@ -1428,7 +1429,7 @@ ${summary.collect { k,v -> "
$k
${v ?: ' Date: Mon, 30 Sep 2019 22:19:30 +0100 Subject: [PATCH 012/113] Add quick start info --- README.md | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 69060e63..4998af02 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,26 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool 6. Create IGV session file containing bigWig tracks, peaks and differential sites for data visualisation ([`IGV`](https://software.broadinstitute.org/software/igv/)). 7. Present QC for raw read, alignment, peak-calling and differential binding results ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/)) +## Quick Start + +i. Install [`nextflow`](https://nf-co.re/usage/installation) + +ii. Install one of [`docker`](https://docs.docker.com/engine/installation/), [`singularity`](https://www.sylabs.io/guides/3.0/user-guide/) or [`conda`](https://conda.io/miniconda.html) + +iii. Download the pipeline and test it on a minimal dataset with a single command + +```bash +nextflow run nf-core/chipseq -profile test, +``` + +iv. Start running your own analysis! + +```bash +nextflow run nf-core/chipseq -profile --design design.csv --genome GRCh37 +``` + +See [usage docs](docs/usage.md) for all of the available options when running the pipeline. + ## Documentation The nf-core/chipseq pipeline comes with documentation about the pipeline, found in the `docs/` directory: @@ -63,7 +83,7 @@ Many thanks to others who have helped out along the way too, including (but not ## Citation -If you use nf-core/chipseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3240507](https://doi.org/10.5281/zenodo.3240507) +If you use nf-core/chipseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3240506](https://doi.org/10.5281/zenodo.3240506) You can cite the `nf-core` pre-print as follows: Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**. *bioRxiv*. 2019. p. 610741. [doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1). From 562dbb79fdc181138c784bd99f28b398630e914a Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 30 Sep 2019 22:21:40 +0100 Subject: [PATCH 013/113] Update Zenodo badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4998af02..4139e692 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) [![Docker](https://img.shields.io/docker/automated/nfcore/chipseq.svg)](https://hub.docker.com/r/nfcore/chipseq/) -[![DOI](https://zenodo.org/badge/130877729.svg)](https://zenodo.org/badge/latestdoi/130877729) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3240507.svg)](https://doi.org/10.5281/zenodo.3240507) ## Introduction **nfcore/chipseq** is a bioinformatics analysis pipeline used for Chromatin ImmunopreciPitation sequencing (ChIP-seq) data. From 61b190adf644a2b02bb78a483fdf59a8e49c71a8 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 30 Sep 2019 22:59:29 +0100 Subject: [PATCH 014/113] Remove unused blacklists --- assets/blacklists/BDGP6-blacklist.bed | 449 -------------------------- assets/blacklists/ce11-blacklist.bed | 122 ------- 2 files changed, 571 deletions(-) delete mode 100644 assets/blacklists/BDGP6-blacklist.bed delete mode 100644 assets/blacklists/ce11-blacklist.bed diff --git a/assets/blacklists/BDGP6-blacklist.bed b/assets/blacklists/BDGP6-blacklist.bed deleted file mode 100644 index af547148..00000000 --- a/assets/blacklists/BDGP6-blacklist.bed +++ /dev/null @@ -1,449 +0,0 @@ -chr2L 47600 49300 -chr2L 982500 984400 -chr2L 2885500 2887000 -chr2L 4920500 4922400 -chr2L 4937900 4941100 -chr2L 5171400 5177700 -chr2L 6426500 6427500 -chr2L 6992200 6996700 -chr2L 7345200 7350300 -chr2L 8102400 8103400 -chr2L 8729600 8731000 -chr2L 9899400 9902800 -chr2L 9976200 9979800 -chr2L 10422300 10423400 -chr2L 11541098 11541243 -chr2L 11992600 11999400 -chr2L 12558600 12563800 -chr2L 12792200 12794100 -chr2L 13522300 13523300 -chr2L 13650700 13651700 -chr2L 15451900 15452900 -chr2L 16514400 16518200 -chr2L 19576100 19577300 -chr2L 19709600 19711500 -chr2L 20197000 20201100 -chr2L 20458300 20459300 -chr2L 20746500 20747500 -chr2L 21022300 21023500 -chr2L 21416300 21440600 -chr2L 21439805 21440204 -chr2L 21447300 21454900 -chr2L 21482700 21485200 -chr2L 21499300 21500400 -chr2L 21537800 21543500 -chr2L 22202600 22203600 -chr2L 22377700 22389700 -chr2L 22605768 22607668 -chr2L 22650968 22653868 -chr2L 22681568 22682568 -chr2L 22709668 22710768 -chr2L 22768468 22770468 -chr2L 22859468 22860468 -chr2L 22892768 22894668 -chr2L 22916468 22917568 -chr2L 22962568 22963668 -chr2L 23095135 23095208 -chr2L 23100681 23100690 -chr2L 23101135 23101198 -chr2L 23155826 23156826 -chr2L 23267984 23270784 -chr2L 23277785 23278785 -chr2L 23331885 23332885 -chr2L 23382531 23383931 -chr2L 23389332 23390332 -chr2L 23396932 23397932 -chr2L 23406032 23407032 -chr2L 23492434 23493440 -chr2L 23503240 23504240 -chr2L 23512440 23513640 -chr2L 23512972 23513113 -chr2L 23513325 23513427 -chr2L 23513325 23513486 -chr2R 4869 4931 -chr2R 127649 130567 -chr2R 167144 168244 -chr2R 233935 235174 -chr2R 285375 290597 -chr2R 309683 310085 -chr2R 530880 534580 -chr2R 870395 874286 -chr2R 1453267 1454267 -chr2R 1518376 1519576 -chr2R 1799340 1799396 -chr2R 1942730 1945330 -chr2R 1974030 1975030 -chr2R 2165041 2169842 -chr2R 2465863 2467163 -chr2R 2489337 2491037 -chr2R 2548937 2551737 -chr2R 2583437 2584437 -chr2R 2651936 2653736 -chr2R 2660236 2665036 -chr2R 2861534 2862834 -chr2R 3267123 3268123 -chr2R 3306823 3307923 -chr2R 3414527 3414565 -chr2R 3492859 3492914 -chr2R 3608792 3608825 -chr2R 3698569 3699969 -chr2R 3718236 3718279 -chr2R 3885272 3885921 -chr2R 3991563 3998435 -chr2R 4174889 4175088 -chr2R 4213195 4214295 -chr2R 4253795 4256895 -chr2R 4318395 4319995 -chr2R 4354395 4355395 -chr2R 4372695 4373695 -chr2R 4437995 4444295 -chr2R 4489295 4500295 -chr2R 4680395 4684895 -chr2R 4733295 4734295 -chr2R 4766495 4769695 -chr2R 4810695 4812495 -chr2R 4834495 4835995 -chr2R 4988195 4989295 -chr2R 5005795 5006795 -chr2R 5048995 5056395 -chr2R 5105195 5109995 -chr2R 5221395 5222795 -chr2R 5230795 5231795 -chr2R 5287095 5288095 -chr2R 5393095 5394795 -chr2R 5406695 5407995 -chr2R 5571095 5572195 -chr2R 5652595 5653895 -chr2R 6308795 6314595 -chr2R 6343595 6348695 -chr2R 6389195 6391695 -chr2R 6400195 6402295 -chr2R 6440795 6441895 -chr2R 6453695 6455095 -chr2R 7200195 7201195 -chr2R 7235995 7247295 -chr2R 7826695 7827695 -chr2R 8491725 8491966 -chr2R 8781195 8783195 -chr2R 9727995 9729995 -chr2R 10184695 10185995 -chr2R 10659595 10661495 -chr2R 10950695 10952595 -chr2R 11021795 11023595 -chr2R 11297595 11301895 -chr2R 12481495 12482495 -chr2R 12819595 12822095 -chr2R 13408395 13411595 -chr2R 13728195 13735795 -chr2R 14102395 14106895 -chr2R 14108359 14108584 -chr2R 14173695 14174895 -chr2R 14189095 14195495 -chr2R 14358795 14361695 -chr2R 14467395 14469295 -chr2R 14891995 14893195 -chr2R 17147995 17152195 -chr2R 17237895 17239695 -chr2R 18371195 18372595 -chr2R 18576595 18579795 -chr2R 18593995 18595995 -chr2R 19729495 19730495 -chr2R 19739895 19743795 -chr2R 19759695 19760795 -chr2R 20779995 20785395 -chr2R 21814295 21816495 -chr2R 22526895 22528295 -chr2R 23365777 23367477 -chr2R 23406677 23407777 -chr2R 23914679 23914711 -chr2R 24183377 24184677 -chr2R 25260089 25261985 -chr3L 1245300 1247200 -chr3L 1425400 1427300 -chr3L 2063900 2069700 -chr3L 3899200 3901900 -chr3L 4361900 4362900 -chr3L 4849900 4850900 -chr3L 5047600 5048600 -chr3L 5104600 5105700 -chr3L 5462900 5464600 -chr3L 6002000 6004400 -chr3L 7248900 7250300 -chr3L 7379500 7380500 -chr3L 7683300 7691600 -chr3L 7795400 7796400 -chr3L 7920700 7921700 -chr3L 8021800 8023900 -chr3L 9083500 9084600 -chr3L 9130628 9130652 -chr3L 9392500 9393500 -chr3L 9576600 9581000 -chr3L 9930000 9937600 -chr3L 11329800 11331500 -chr3L 11507200 11508200 -chr3L 11613100 11619300 -chr3L 11968500 11972400 -chr3L 13579100 13580100 -chr3L 14726800 14728700 -chr3L 14825400 14826600 -chr3L 15296900 15298400 -chr3L 15423800 15426700 -chr3L 15555600 15558000 -chr3L 15825600 15826600 -chr3L 16051400 16053300 -chr3L 16599000 16607700 -chr3L 16685800 16688500 -chr3L 17537103 17537465 -chr3L 17918400 17921100 -chr3L 18529200 18530200 -chr3L 20477700 20483500 -chr3L 20822100 20824700 -chr3L 21374600 21376500 -chr3L 21485000 21486300 -chr3L 21753200 21754400 -chr3L 22099800 22102500 -chr3L 22817900 22819600 -chr3L 23042900 23044100 -chr3L 23140900 23142500 -chr3L 23423400 23424600 -chr3L 23440100 23441100 -chr3L 23497500 23498500 -chr3L 23669300 23675300 -chr3L 23791100 23792200 -chr3L 23826000 23827900 -chr3L 23968000 23971800 -chr3L 24091600 24102100 -chr3L 24169600 24171900 -chr3L 24193900 24195700 -chr3L 24220900 24221900 -chr3L 24370900 24371900 -chr3L 24440900 24442100 -chr3L 24467900 24470200 -chr3L 24502900 24504900 -chr3L 24544300 24546200 -chr3L 24658803 24659994 -chr3L 24667605 24668905 -chr3L 24794768 24795768 -chr3L 25051756 25052756 -chr3L 25223122 25224722 -chr3L 25288122 25290522 -chr3L 25778255 25778727 -chr3L 25897201 25903001 -chr3L 25963501 25964701 -chr3L 26116482 26117982 -chr3L 26149978 26150978 -chr3L 26610641 26611641 -chr3L 26704569 26706569 -chr3L 27071207 27071367 -chr3L 27079475 27080475 -chr3L 27095375 27101075 -chr3L 27153218 27153246 -chr3L 27429589 27429714 -chr3L 27747393 27748493 -chr3L 27959562 27964651 -chr3L 28076881 28077240 -chr3L 28110068 28110227 -chr3R 2453 2512 -chr3R 21385 21546 -chr3R 84563 84637 -chr3R 141480 141655 -chr3R 141917 141981 -chr3R 173097 173203 -chr3R 231789 231819 -chr3R 1378782 1379782 -chr3R 1781282 1781567 -chr3R 2088173 2089173 -chr3R 2324662 2325662 -chr3R 2698494 2710781 -chr3R 2700868 2701184 -chr3R 2779767 2781467 -chr3R 2792181 2793381 -chr3R 2804364 2805364 -chr3R 2850166 2851266 -chr3R 2927255 2930755 -chr3R 2953255 2959255 -chr3R 3001634 3001890 -chr3R 3022269 3022796 -chr3R 3033708 3033774 -chr3R 3049012 3049261 -chr3R 3129778 3131378 -chr3R 3209582 3210582 -chr3R 3371264 3372364 -chr3R 3498665 3504561 -chr3R 3529261 3571759 -chr3R 3647138 3660653 -chr3R 3698553 3700452 -chr3R 3716162 3730380 -chr3R 3935332 3935391 -chr3R 3951581 3952147 -chr3R 3953905 3954905 -chr3R 4106234 4107734 -chr3R 4231678 4233678 -chr3R 4270678 4277178 -chr3R 4373178 4374178 -chr3R 4753378 4761178 -chr3R 4893878 4894878 -chr3R 5003878 5007178 -chr3R 5047878 5052978 -chr3R 5259478 5260878 -chr3R 5339878 5343378 -chr3R 5822278 5823478 -chr3R 6307578 6308778 -chr3R 6510078 6511078 -chr3R 6820178 6822978 -chr3R 7087178 7088578 -chr3R 7351278 7353178 -chr3R 7700578 7703078 -chr3R 8043778 8044878 -chr3R 8095178 8096278 -chr3R 8403078 8404978 -chr3R 8571178 8573478 -chr3R 8900978 8913778 -chr3R 9691078 9699878 -chr3R 10257478 10259578 -chr3R 10385078 10388078 -chr3R 10960578 10961978 -chr3R 11067078 11068078 -chr3R 11371978 11373278 -chr3R 11669078 11670378 -chr3R 11841178 11842278 -chr3R 11959878 11960878 -chr3R 12491878 12494478 -chr3R 12499778 12510278 -chr3R 12711878 12713278 -chr3R 13314778 13318878 -chr3R 13978578 13980978 -chr3R 14246478 14247878 -chr3R 15105878 15106878 -chr3R 15133778 15138778 -chr3R 15165678 15166678 -chr3R 15607778 15608778 -chr3R 16988278 16994678 -chr3R 18033778 18037178 -chr3R 18330721 18331078 -chr3R 19029878 19031178 -chr3R 19441578 19442878 -chr3R 19775578 19777178 -chr3R 20111078 20112078 -chr3R 20247878 20248878 -chr3R 20553378 20559778 -chr3R 21300278 21302578 -chr3R 21610678 21611778 -chr3R 22298478 22299478 -chr3R 22450378 22451878 -chr3R 22488578 22489578 -chr3R 22883878 22884878 -chr3R 23533278 23534378 -chr3R 23838978 23843678 -chr3R 23857278 23858278 -chr3R 24077378 24078478 -chr3R 24207178 24211678 -chr3R 25327178 25328678 -chr3R 25398278 25400378 -chr3R 25609678 25610678 -chr3R 26116378 26117378 -chr3R 27096678 27097878 -chr3R 27143378 27145278 -chr3R 28253578 28255478 -chr3R 28758778 28759778 -chr3R 29653278 29654378 -chr3R 29778878 29779978 -chr3R 30238878 30239878 -chr3R 30401278 30403178 -chr3R 31075278 31078578 -chr3R 31331678 31333578 -chr3R 31415678 31417878 -chr3R 31892978 31894778 -chr3R 31980778 31983478 -chr3R 32070078 32073978 -chr4 37874 38874 -chr4 207774 208774 -chr4 413974 414974 -chr4 545274 546274 -chr4 591674 595274 -chr4 789474 790574 -chr4 840174 841374 -chr4 907974 909174 -chr4 1176474 1177474 -chr4 1199976 1200452 -chr4 1279674 1296774 -chr4 1311074 1324574 -chr4 1335974 1337274 -chrM 1473 15268 -chrX 11670 12770 -chrX 109913 120313 -chrX 429167 434567 -chrX 554667 556867 -chrX 564567 565567 -chrX 814167 815167 -chrX 1006467 1007667 -chrX 1365567 1367467 -chrX 1518167 1522767 -chrX 1527551 1527606 -chrX 1933767 1935667 -chrX 1959567 1960767 -chrX 2404967 2406667 -chrX 2611367 2617567 -chrX 3415167 3421167 -chrX 3790367 3793567 -chrX 3798767 3801767 -chrX 3945167 3948067 -chrX 4733667 4735967 -chrX 4926267 4932967 -chrX 4991367 4993367 -chrX 6383967 6384967 -chrX 7024167 7026667 -chrX 7125367 7127267 -chrX 7480867 7482167 -chrX 8293167 8296667 -chrX 10395567 10396567 -chrX 11099867 11103067 -chrX 11596067 11598067 -chrX 11646239 11646265 -chrX 11890167 11891167 -chrX 12932467 12937767 -chrX 14049067 14050267 -chrX 14056867 14060367 -chrX 14278767 14280567 -chrX 14551667 14556567 -chrX 15795267 15796467 -chrX 16053767 16059467 -chrX 16344868 16345336 -chrX 17115167 17119267 -chrX 19578167 19579667 -chrX 19637067 19638267 -chrX 19943267 19945167 -chrX 20198801 20200406 -chrX 20201693 20202006 -chrX 20841973 20843373 -chrX 21607773 21609673 -chrX 21622773 21623873 -chrX 21653410 21653907 -chrX 21723673 21725673 -chrX 21741673 21743573 -chrX 21963550 21965050 -chrX 22207190 22208582 -chrX 22369467 22371367 -chrX 22432165 22434065 -chrX 23021304 23022282 -chrX 23036082 23037082 -chrX 23056082 23062382 -chrX 23108682 23109682 -chrX 23114821 23115169 -chrX 23184182 23185182 -chrX 23199382 23200382 -chrX 23217282 23218482 -chrX 23291206 23291256 -chrX 23349323 23349358 -chrX 23475208 23475474 -chrY 587886 591286 -chrY 652186 653386 -chrY 734077 735877 -chrY 803659 803847 -chrY 860181 860257 -chrY 969051 969293 -chrY 1423565 1423631 -chrY 2321441 2321732 -chrY 2570084 2570211 -chrY 2848695 2848887 -chrY 3562137 3562433 diff --git a/assets/blacklists/ce11-blacklist.bed b/assets/blacklists/ce11-blacklist.bed deleted file mode 100644 index 8a17aad9..00000000 --- a/assets/blacklists/ce11-blacklist.bed +++ /dev/null @@ -1,122 +0,0 @@ -chrI 932997 934497 -chrI 2542898 2543998 -chrI 3171398 3172598 -chrI 3664797 3666097 -chrI 3989697 3990997 -chrI 4544299 4547499 -chrI 5152597 5153997 -chrI 10130610 10133010 -chrI 10208010 10209110 -chrI 10216310 10219210 -chrI 10266309 10274309 -chrI 10946007 10953107 -chrI 14453012 14454612 -chrI 15059811 15072411 -chrII 0 1000 -chrII 500900 502100 -chrII 694793 696493 -chrII 1452493 1453593 -chrII 2569895 2571395 -chrII 2897396 2898696 -chrII 3465997 3468697 -chrII 3796197 3797498 -chrII 3941998 3946697 -chrII 3962397 3963397 -chrII 3993897 3994897 -chrII 4284898 4285898 -chrII 4640903 4645003 -chrII 5144709 5146709 -chrII 6506132 6509132 -chrII 7444243 7448843 -chrII 8287450 8292950 -chrII 8975462 8976962 -chrII 9631759 9633259 -chrII 9809659 9824759 -chrII 10335760 10339360 -chrII 12843569 12846169 -chrII 13598570 13600070 -chrII 13939974 13941474 -chrII 13984974 13987074 -chrII 14324176 14326176 -chrII 14336876 14339776 -chrII 14992376 14994276 -chrII 15277076 15279376 -chrIII 414401 415601 -chrIII 930611 932411 -chrIII 1017911 1020111 -chrIII 1269508 1270508 -chrIII 1299408 1302908 -chrIII 2497010 2501110 -chrIII 5353939 5358539 -chrIII 7415865 7417865 -chrIII 7443965 7449265 -chrIII 7594664 7597264 -chrIII 8862681 8864181 -chrIII 10224291 10226191 -chrIII 13778301 13783801 -chrIV 906200 907700 -chrIV 2828302 2830902 -chrIV 3206303 3209503 -chrIV 4416207 4421907 -chrIV 6357711 6361011 -chrIV 6468711 6469811 -chrIV 6698011 6699711 -chrIV 6714311 6724411 -chrIV 7593511 7598311 -chrIV 8572913 8581913 -chrIV 9045815 9049015 -chrIV 10943021 10951221 -chrIV 11070521 11076021 -chrIV 11610823 11612723 -chrIV 11697023 11698023 -chrIV 12024022 12025422 -chrIV 12169322 12170622 -chrIV 12314422 12319522 -chrIV 12730523 12731823 -chrIV 13360424 13362224 -chrIV 13548524 13549924 -chrIV 16963335 16964835 -chrIV 17059735 17062235 -chrV 264299 267299 -chrV 1638000 1639300 -chrV 3098302 3099702 -chrV 3434603 3438803 -chrV 4333314 4336614 -chrV 5073315 5076315 -chrV 5283116 5286116 -chrV 6172117 6178017 -chrV 6939118 6943218 -chrV 7442619 7444819 -chrV 7919720 7925020 -chrV 7988620 7991520 -chrV 8699222 8701922 -chrV 9432724 9435524 -chrV 10606121 10612021 -chrV 12509619 12510919 -chrV 14756415 14757515 -chrV 14766615 14770515 -chrV 16707222 16709422 -chrV 17119724 17132624 -chrV 17308625 17311725 -chrV 17384125 17385825 -chrV 17391225 17394525 -chrV 18400128 18401728 -chrX 109500 114200 -chrX 291200 295300 -chrX 1752205 1755105 -chrX 3007010 3008310 -chrX 4026023 4051823 -chrX 5056231 5057331 -chrX 5914635 5915835 -chrX 7076944 7079144 -chrX 9186057 9189257 -chrX 9438159 9439559 -chrX 10361560 10367060 -chrX 11785767 11789867 -chrX 11886368 11889068 -chrX 12277168 12278968 -chrX 14388070 14389270 -chrX 14907969 14909769 -chrX 15226969 15228869 -chrX 15807468 15811268 -chrX 16758373 16760073 From 3f63bbd1bcc822f25b9629550465d3b9698d8e0d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 3 Oct 2019 13:51:18 +0100 Subject: [PATCH 015/113] Adjust spacing --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 4139e692..d15f5331 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,7 @@ The nf-core/chipseq pipeline comes with documentation about the pipeline, found 5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) ## Credits + These scripts were orginally written by Chuan Wang ([@chuan-wang](https://github.com/chuan-wang)) and Phil Ewels ([@ewels](https://github.com/ewels)) for use at the [National Genomics Infrastructure](https://portal.scilifelab.se/genomics/) at [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden. It has since been re-implemented by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [The Bioinformatics & Biostatistics Group](https://www.crick.ac.uk/research/science-technology-platforms/bioinformatics-and-biostatistics/) at [The Francis Crick Institute](https://www.crick.ac.uk/), London. Many thanks to others who have helped out along the way too, including (but not limited to): [@apeltzer](https://github.com/apeltzer), [@bc2zb](https://github.com/bc2zb), [@drejom](https://github.com/drejom), [@KevinMenden](https://github.com/KevinMenden), [@pditommaso](https://github.com/pditommaso). From fe7cc2c1c8f6011e4debb17f89482ee2e2c8d72c Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 3 Oct 2019 13:52:20 +0100 Subject: [PATCH 016/113] Update --- conf/base.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/base.config b/conf/base.config index 64c7e4a3..1aee5956 100644 --- a/conf/base.config +++ b/conf/base.config @@ -36,7 +36,7 @@ process { time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { - time = { check_max( 8.h * task.attempt, 'time' ) } + time = { check_max( 12.h * task.attempt, 'time' ) } } withName:get_software_versions { cache = false From 02fbef71a052a5d2eb8b2e33151cc0f56053c227 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 3 Oct 2019 13:52:35 +0100 Subject: [PATCH 017/113] Update with atacseq --- main.nf | 29 ++++++++++++++++------------- nextflow.config | 2 +- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/main.nf b/main.nf index 87c45a95..709b46d8 100755 --- a/main.nf +++ b/main.nf @@ -21,7 +21,7 @@ def helpMessage() { Mandatory arguments: --design Comma-separated file containing information about the samples in the experiment (see docs/usage.md) --fasta Path to Fasta reference. Not mandatory when using reference in iGenomes config via --genome - --gtf Path to GTF file in Ensembl format. Not mandatory when using reference in iGenomes config via --genome + --gtf Path to GTF file. Not mandatory when using reference in iGenomes config via --genome -profile Configuration profile to use. Can use multiple (comma separated) Available: conda, docker, singularity, awsbatch, test @@ -577,10 +577,10 @@ process SortBAM { if (params.saveAlignedIntermediates) { publishDir path: "${params.outdir}/bwa/library", mode: 'copy', saveAs: { filename -> - if (filename.endsWith(".flagstat")) "samtools_stats/$filename" - else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" - else if (filename.endsWith(".stats")) "samtools_stats/$filename" - else filename } + if (filename.endsWith(".flagstat")) "samtools_stats/$filename" + else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" + else if (filename.endsWith(".stats")) "samtools_stats/$filename" + else filename } } input: @@ -1045,7 +1045,7 @@ process PlotFingerprint { */ process MACSCallPeak { tag "${ip} vs ${control}" - label 'process_long' + label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}", mode: 'copy', saveAs: {filename -> if (filename.endsWith(".tsv")) "qc/$filename" @@ -1081,15 +1081,14 @@ process MACSCallPeak { -g ${params.macs_gsize} \\ -n $ip \\ $pileup \\ - --keep-dup all \\ - --nomodel + --keep-dup all cat ${ip}_peaks.${peaktype} | wc -l | awk -v OFS='\t' '{ print "${ip}", \$1 }' | cat $peak_count_header - > ${ip}_peaks.count_mqc.tsv READS_IN_PEAKS=\$(intersectBed -a ${ipbam[0]} -b ${ip}_peaks.${peaktype} -bed -c -f 0.20 | awk -F '\t' '{sum += \$NF} END {print sum}') grep 'mapped (' $ipflagstat | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${ip}", a/\$1}' | cat $frip_score_header - > ${ip}_peaks.FRiP_mqc.tsv - find * -type f -name "*.${peaktype}" -exec echo -e "bwa/mergedLibrary/macs/${peaktype}/"{}"\\t0,0,178" \\; > ${ip}_peaks.${peaktype}.igv.txt + find * -type f -name "*.${peaktype}" -exec echo -e "bwa/mergedLibrary/macs/${peaktype}/"{}"\\t0,0,178" \\; > ${ip}_peaks.igv.txt """ } @@ -1145,12 +1144,14 @@ process PeakQC { script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ - plot_macs_qc.r -i ${peaks.join(',')} \\ + plot_macs_qc.r \\ + -i ${peaks.join(',')} \\ -s ${peaks.join(',').replaceAll("_peaks.${peaktype}","")} \\ -o ./ \\ -p macs_peak - plot_homer_annotatepeaks.r -i ${annos.join(',')} \\ + plot_homer_annotatepeaks.r \\ + -i ${annos.join(',')} \\ -s ${annos.join(',').replaceAll("_peaks.annotatePeaks.txt","")} \\ -o ./ \\ -p macs_annotatePeaks @@ -1341,7 +1342,7 @@ process DeseqConsensusPeakSet { * STEP 8 - Create IGV session file */ process IGV { - publishDir "${params.outdir}/igv", mode: 'copy' + publishDir "${params.outdir}/igv/${peaktype}", mode: 'copy' when: !params.skipIGV @@ -1357,6 +1358,7 @@ process IGV { file "*.{txt,xml}" into ch_igv_session script: // scripts are bundled with the pipeline, in nf-core/chipseq/bin/ + def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ cat *.txt > igv_files.txt igv_files_to_session.py igv_session.xml igv_files.txt ../reference_genome/${fasta.getName()} --path_prefix '../' @@ -1430,7 +1432,7 @@ ${summary.collect { k,v -> "
$k
${v ?: ' Date: Thu, 3 Oct 2019 18:37:45 +0100 Subject: [PATCH 018/113] Minor updates --- main.nf | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/main.nf b/main.nf index 709b46d8..de502f5e 100755 --- a/main.nf +++ b/main.nf @@ -46,7 +46,7 @@ def helpMessage() { --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed (Default: 0) --three_prime_clip_r2 [int] Instructs Trim Galore to re move bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed (Default: 0) --skipTrimming Skip the adapter trimming step - --saveTrimmed Save the trimmed FastQ files in the the results directory + --saveTrimmed Save the trimmed FastQ files in the results directory Alignments --keepDups Duplicate reads are not filtered from alignments @@ -413,7 +413,7 @@ process MakeGenomeFilter { input: file fasta from ch_fasta - file blacklist from ch_blacklist + file blacklist from ch_blacklist.ifEmpty([]) output: file "$fasta" into ch_genome_fasta // FASTA FILE FOR IGV @@ -1361,7 +1361,7 @@ process IGV { def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ cat *.txt > igv_files.txt - igv_files_to_session.py igv_session.xml igv_files.txt ../reference_genome/${fasta.getName()} --path_prefix '../' + igv_files_to_session.py igv_session.xml igv_files.txt ../../reference_genome/${fasta.getName()} --path_prefix '../../' """ } @@ -1440,9 +1440,12 @@ process MultiQC { input: file multiqc_config from ch_multiqc_config - file ('fastqc/*') from ch_fastqc_reports_mqc.collect() - file ('trimgalore/*') from ch_trimgalore_results_mqc.collect() - file ('trimgalore/fastqc/*') from ch_trimgalore_fastqc_reports_mqc.collect() + file ('software_versions/*') from ch_software_versions_mqc.collect() + file ('workflow_summary/*') from create_workflow_summary(summary) + + file ('fastqc/*') from ch_fastqc_reports_mqc.collect().ifEmpty([]) + file ('trimgalore/*') from ch_trimgalore_results_mqc.collect().ifEmpty([]) + file ('trimgalore/fastqc/*') from ch_trimgalore_fastqc_reports_mqc.collect().ifEmpty([]) file ('alignment/library/*') from ch_sort_bam_flagstat_mqc.collect() file ('alignment/mergedLibrary/*') from ch_merge_bam_stats_mqc.collect() @@ -1461,8 +1464,6 @@ process MultiQC { file ('deeptools/*') from ch_plotprofile_mqc.collect().ifEmpty([]) file ('phantompeakqualtools/*') from ch_spp_out_mqc.collect().ifEmpty([]) file ('phantompeakqualtools/*') from ch_spp_csv_mqc.collect().ifEmpty([]) - file ('software_versions/*') from ch_software_versions_mqc.collect() - file ('workflow_summary/*') from create_workflow_summary(summary) output: file "*multiqc_report.html" into ch_multiqc_report From 3d55800353f287f6414f4b5a1e9a00939b92d420 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 4 Oct 2019 09:33:26 +0100 Subject: [PATCH 019/113] Remove def where required --- main.nf | 94 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/main.nf b/main.nf index de502f5e..2cb2cc5f 100755 --- a/main.nf +++ b/main.nf @@ -422,7 +422,7 @@ process MakeGenomeFilter { file "*.sizes" into ch_genome_sizes_bigwig // CHROMOSOME SIZES FILE FOR BEDTOOLS script: - def blacklist_filter = params.blacklist ? "sortBed -i $blacklist -g ${fasta}.sizes | complementBed -i stdin -g ${fasta}.sizes" : "awk '{print \$1, '0' , \$2}' OFS='\t' ${fasta}.sizes" + blacklist_filter = params.blacklist ? "sortBed -i $blacklist -g ${fasta}.sizes | complementBed -i stdin -g ${fasta}.sizes" : "awk '{print \$1, '0' , \$2}' OFS='\t' ${fasta}.sizes" """ samtools faidx $fasta cut -f 1,2 ${fasta}.fai > ${fasta}.sizes @@ -510,10 +510,10 @@ if (params.skipTrimming){ script: // Added soft-links to original fastqs for consistent naming in MultiQC - def c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' - def c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' - def tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' - def tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' + c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' + c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' + tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' + tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' if (params.singleEnd) { """ [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz @@ -552,8 +552,8 @@ process BWAmem { set val(name), file("*.bam") into ch_bwa_bam script: - def prefix="${name}.Lb" - def rg="\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'" + prefix="${name}.Lb" + rg="\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'" if (params.seq_center) { rg="\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\\tCN:${params.seq_center}\'" } @@ -591,7 +591,7 @@ process SortBAM { file "*.{flagstat,idxstats,stats}" into ch_sort_bam_flagstat_mqc script: - def prefix="${name}.Lb" + prefix="${name}.Lb" """ samtools sort -@ $task.cpus -o ${prefix}.sorted.bam -T $name $bam samtools index ${prefix}.sorted.bam @@ -639,8 +639,8 @@ process MergeBAM { file "*.txt" into ch_merge_bam_metrics_mqc script: - def prefix="${name}.mLb.mkD" - def bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() + prefix="${name}.mLb.mkD" + bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() def avail_mem = 3 if (!task.memory){ log.info "[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this." @@ -718,12 +718,12 @@ process FilterBAM { file "*.{idxstats,stats}" into ch_filter_bam_stats_mqc script: - def prefix = params.singleEnd ? "${name}.mLb.clN" : "${name}.mLb.flT" - def filter_params = params.singleEnd ? "-F 0x004" : "-F 0x004 -F 0x0008 -f 0x001" - def dup_params = params.keepDups ? "" : "-F 0x0400" - def multimap_params = params.keepMultiMap ? "" : "-q 1" - def blacklist_params = params.blacklist ? "-L $bed" : "" - def name_sort_bam = params.singleEnd ? "" : "samtools sort -n -@ $task.cpus -o ${prefix}.bam -T $prefix ${prefix}.sorted.bam" + prefix = params.singleEnd ? "${name}.mLb.clN" : "${name}.mLb.flT" + filter_params = params.singleEnd ? "-F 0x004" : "-F 0x004 -F 0x0008 -f 0x001" + dup_params = params.keepDups ? "" : "-F 0x0400" + multimap_params = params.keepMultiMap ? "" : "-q 1" + blacklist_params = params.blacklist ? "-L $bed" : "" + name_sort_bam = params.singleEnd ? "" : "samtools sort -n -@ $task.cpus -o ${prefix}.bam -T $prefix ${prefix}.sorted.bam" """ samtools view \\ $filter_params \\ @@ -788,7 +788,7 @@ if (params.singleEnd){ file "*.{idxstats,stats}" into ch_rm_orphan_stats_mqc script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - def prefix="${name}.mLb.clN" + prefix="${name}.mLb.clN" """ bampe_rm_orphan.py ${bam[0]} ${prefix}.bam --only_fr_pairs @@ -827,7 +827,7 @@ process Preseq { file "*.ccurve.txt" into ch_preseq_results script: - def prefix="${name}.mLb.clN" + prefix="${name}.mLb.clN" """ preseq lc_extrap -v -output ${prefix}.ccurve.txt -bam ${bam[0]} """ @@ -858,7 +858,7 @@ process CollectMultipleMetrics { file "*.pdf" into ch_collectmetrics_pdf script: - def prefix="${name}.mLb.clN" + prefix="${name}.mLb.clN" def avail_mem = 3 if (!task.memory){ log.info "[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this." @@ -898,9 +898,9 @@ process BigWig { file "*igv.txt" into ch_bigwig_igv script: - def prefix="${name}.mLb.clN" - def pe_fragment = params.singleEnd ? "" : "-pc" - def extend = (params.singleEnd && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' + prefix="${name}.mLb.clN" + pe_fragment = params.singleEnd ? "" : "-pc" + extend = (params.singleEnd && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' """ SCALE_FACTOR=\$(grep 'mapped (' $flagstat | awk '{print 1000000/\$1}') echo \$SCALE_FACTOR > ${prefix}.scale_factor.txt @@ -1024,7 +1024,7 @@ process PlotFingerprint { file '*.raw.txt' into ch_plotfingerprint_mqc script: - def extend = (params.singleEnd && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' + extend = (params.singleEnd && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' """ plotFingerprint \\ --bamfiles ${ipbam[0]} ${controlbam[0]} \\ @@ -1068,10 +1068,10 @@ process MACSCallPeak { file "*_mqc.tsv" into ch_macs_mqc script: - def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - def broad = params.narrowPeak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" - def format = params.singleEnd ? "BAM" : "BAMPE" - def pileup = params.saveMACSPileup ? "-B --SPMR" : "" + peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + broad = params.narrowPeak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" + format = params.singleEnd ? "BAM" : "BAMPE" + pileup = params.saveMACSPileup ? "-B --SPMR" : "" """ macs2 callpeak \\ -t ${ipbam[0]} \\ @@ -1112,7 +1112,7 @@ process AnnotatePeaks { file "*.txt" into ch_macs_annotate script: - def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ annotatePeaks.pl $peak \\ $fasta \\ @@ -1142,7 +1142,7 @@ process PeakQC { file "*.tsv" into ch_macs_qc_mqc script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ plot_macs_qc.r \\ -i ${peaks.join(',')} \\ @@ -1200,11 +1200,11 @@ process CreateConsensusPeakSet { file "*igv.txt" into ch_macs_consensus_igv script: // scripts are bundled with the pipeline, in nf-core/chipseq/bin/ - def prefix="${antibody}.consensus_peaks" - def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - def mergecols = params.narrowPeak ? (2..10).join(',') : (2..9).join(',') - def collapsecols = params.narrowPeak ? (["collapse"]*9).join(',') : (["collapse"]*8).join(',') - def expandparam = params.narrowPeak ? "--is_narrow_peak" : "" + prefix="${antibody}.consensus_peaks" + peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + mergecols = params.narrowPeak ? (2..10).join(',') : (2..9).join(',') + collapsecols = params.narrowPeak ? (["collapse"]*9).join(',') : (["collapse"]*8).join(',') + expandparam = params.narrowPeak ? "--is_narrow_peak" : "" """ sort -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\ | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt @@ -1247,8 +1247,8 @@ process AnnotateConsensusPeakSet { file "*.annotatePeaks.txt" into ch_macs_consensus_annotate script: - def prefix="${antibody}.consensus_peaks" - def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + prefix="${antibody}.consensus_peaks" + peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ annotatePeaks.pl $bed \\ $fasta \\ @@ -1301,11 +1301,11 @@ process DeseqConsensusPeakSet { file "*.tsv" into ch_macs_consensus_deseq_mqc script: - def prefix="${antibody}.consensus_peaks" - def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - def bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() - def bam_ext = params.singleEnd ? ".mLb.clN.sorted.bam" : ".mLb.clN.bam" - def pe_params = params.singleEnd ? '' : "-p --donotsort" + prefix="${antibody}.consensus_peaks" + peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() + bam_ext = params.singleEnd ? ".mLb.clN.sorted.bam" : ".mLb.clN.bam" + pe_params = params.singleEnd ? '' : "-p --donotsort" """ featureCounts -F SAF \\ -O \\ @@ -1358,7 +1358,7 @@ process IGV { file "*.{txt,xml}" into ch_igv_session script: // scripts are bundled with the pipeline, in nf-core/chipseq/bin/ - def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ cat *.txt > igv_files.txt igv_files_to_session.py igv_session.xml igv_files.txt ../../reference_genome/${fasta.getName()} --path_prefix '../../' @@ -1442,7 +1442,7 @@ process MultiQC { file ('software_versions/*') from ch_software_versions_mqc.collect() file ('workflow_summary/*') from create_workflow_summary(summary) - + file ('fastqc/*') from ch_fastqc_reports_mqc.collect().ifEmpty([]) file ('trimgalore/*') from ch_trimgalore_results_mqc.collect().ifEmpty([]) file ('trimgalore/fastqc/*') from ch_trimgalore_fastqc_reports_mqc.collect().ifEmpty([]) @@ -1471,10 +1471,10 @@ process MultiQC { file "multiqc_plots" script: - def peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - def rtitle = custom_runName ? "--title \"$custom_runName\"" : '' - def rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - def mqcstats = params.skipMultiQCStats ? '--cl_config "skip_generalstats: true"' : '' + peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + rtitle = custom_runName ? "--title \"$custom_runName\"" : '' + rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' + mqcstats = params.skipMultiQCStats ? '--cl_config "skip_generalstats: true"' : '' """ multiqc . -f $rtitle $rfilename --config $multiqc_config \\ -m custom_content -m fastqc -m cutadapt -m samtools -m picard -m preseq -m featureCounts -m deeptools -m phantompeakqualtools \\ From 8f9d1eb23d71386f35d295e6a40089d098d7aabe Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 4 Oct 2019 09:38:02 +0100 Subject: [PATCH 020/113] Change fragment size parameter back --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 2b1906d1..c1170223 100644 --- a/nextflow.config +++ b/nextflow.config @@ -11,7 +11,7 @@ params { // Options: Generic singleEnd = false seq_center = false - fragment_size = 0 + fragment_size = 200 fingerprint_bins = 500000 // Options: References From 9f81410e9e772b81624eec49ff88a5c020674593 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 4 Oct 2019 09:43:38 +0100 Subject: [PATCH 021/113] Update CHANGELOG --- CHANGELOG.md | 9 +++++++++ main.nf | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 64d3f12d..76db6536 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,17 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` +* [#46](https://github.com/nf-core/atacseq/issues/46) - Missing gene_bed path in igenomes config +* [#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly +* Capitalised process names +* Add quick start information to main README + ### `Fixed` +* [#44](https://github.com/nf-core/atacseq/issues/44) - Output directory missing: macs2/consensus/deseq2 +* Increase default resource requirements in `base.config` +* Increase process-specific requirements based on user-reported failures + ### `Dependencies` diff --git a/main.nf b/main.nf index 2cb2cc5f..a66d9f4d 100755 --- a/main.nf +++ b/main.nf @@ -581,7 +581,7 @@ process SortBAM { else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" else if (filename.endsWith(".stats")) "samtools_stats/$filename" else filename } - } + } input: set val(name), file(bam) from ch_bwa_bam From 9be6a3128f646e7933acfd158f081551201302a9 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 4 Oct 2019 10:00:36 +0100 Subject: [PATCH 022/113] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76db6536..a39267ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,12 +10,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` * [#46](https://github.com/nf-core/atacseq/issues/46) - Missing gene_bed path in igenomes config -* [#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly * Capitalised process names * Add quick start information to main README ### `Fixed` +* [#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly * [#44](https://github.com/nf-core/atacseq/issues/44) - Output directory missing: macs2/consensus/deseq2 * Increase default resource requirements in `base.config` * Increase process-specific requirements based on user-reported failures From 3369c5f106356dd2c980c7164f9d74f861f588eb Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 4 Oct 2019 14:01:59 +0100 Subject: [PATCH 023/113] Bump travis --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index a66d9f4d..408bc7a3 100755 --- a/main.nf +++ b/main.nf @@ -345,7 +345,7 @@ if (!params.bwa_index){ tag "$fasta" label 'process_high' publishDir path: { params.saveGenomeIndex ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.saveGenomeIndex ? it : null }, mode: 'copy' + saveAs: { params.saveGenomeIndex ? it : null }, mode: 'copy' input: file fasta from ch_fasta From a9f258be0394ddb1f8a3ef2a12968695f715e4be Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 4 Oct 2019 18:16:45 +0100 Subject: [PATCH 024/113] Change process names --- main.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index 408bc7a3..018061ac 100755 --- a/main.nf +++ b/main.nf @@ -693,7 +693,7 @@ process MergeBAM { /* * STEP 4.2 Filter BAM file at merged library-level */ -process FilterBAM { +process MergeBAMFilter { tag "$name" label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: 'copy', @@ -759,7 +759,7 @@ if (params.singleEnd){ ch_rm_orphan_flagstat_mqc } ch_filter_bam_stats_mqc.set { ch_rm_orphan_stats_mqc } } else { - process RemoveOrphans { + process MergeBAMRemoveOrphan { tag "$name" label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: 'copy', @@ -1177,7 +1177,7 @@ ch_macs_consensus.map { it -> [ it[0], it[1], it[2], it[-1] ] } /* * STEP 7.1 Consensus peaks across samples, create boolean filtering file, .saf file for featureCounts and UpSetR plot for intersection */ -process CreateConsensusPeakSet { +process ConsensusPeakSet { tag "${antibody}" label 'process_long' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}", mode: 'copy', @@ -1229,7 +1229,7 @@ process CreateConsensusPeakSet { /* * STEP 7.2 Annotate consensus peaks with HOMER, and add annotation to boolean output file */ -process AnnotateConsensusPeakSet { +process ConsensusPeakSetAnnotate { tag "${antibody}" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}", mode: 'copy' @@ -1273,7 +1273,7 @@ ch_group_bam_deseq.map { it -> [ it[3], [ it[0], it[1], it[2] ] ] } /* * STEP 7.3 Count reads in consensus peaks with featureCounts and perform differential analysis with DESeq2 */ -process DeseqConsensusPeakSet { +process ConsensusPeakSetDESeq { tag "${antibody}" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}/deseq2", mode: 'copy', From f040ba96cfbbb42244e610b708efb75e326e5886 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 7 Oct 2019 10:39:40 +0100 Subject: [PATCH 025/113] Update gene bed bug --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 018061ac..9ebffc96 100755 --- a/main.nf +++ b/main.nf @@ -157,7 +157,7 @@ ch_spp_rsc_header = file("$baseDir/assets/multiqc/spp_rsc_header.txt", checkIfEx // Validate inputs if (params.design) { ch_design = file(params.design, checkIfExists: true) } else { exit 1, "Samples design file not specified!" } if (params.gtf) { ch_gtf = file(params.gtf, checkIfExists: true) } else { exit 1, "GTF annotation file not specified!" } -if (params.gene_bed) { ch_gene_bed = file(params.gene.bed, checkIfExists: true) } +if (params.gene_bed) { ch_gene_bed = file(params.gene_bed, checkIfExists: true) } if (params.tss_bed) { ch_tss_bed = file(params.tss_bed, checkIfExists: true) } if (params.blacklist) { ch_blacklist = file(params.blacklist, checkIfExists: true) } else { ch_blacklist = Channel.empty() } From a628c4a3585ef9b137a5c8dd56f72df77ce626a4 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 10 Oct 2019 16:38:49 +0100 Subject: [PATCH 026/113] Update contributing --- .github/CONTRIBUTING.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index fef25faa..2662b570 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -6,7 +6,7 @@ We try to manage the required tasks for nf-core/chipseq using GitHub issues, you However, don't be put off by this template - other more general issues and suggestions are welcome! Contributions to the code are even more welcome ;) -> If you need help using or modifying nf-core/chipseq then the best place to ask is on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). +> If you need help using or modifying nf-core/chipseq then the best place to ask is on the pipeline channel on [Slack](https://nf-co.re/join/slack/). @@ -32,7 +32,7 @@ Typically, pull-requests are only fully reviewed when these tests are passing, t There are typically two types of tests that run: ### Lint Tests -The nf-core has a [set of guidelines](http://nf-co.re/guidelines) which all pipelines must adhere to. +The nf-core has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. @@ -44,4 +44,4 @@ If there are any failures then the automated tests fail. These tests are run both with the latest available version of Nextflow and also the minimum required version that is stated in the pipeline code. ## Getting help -For further information/help, please consult the [nf-core/chipseq documentation](https://github.com/nf-core/chipseq#documentation) and don't hesitate to get in touch on the pipeline channel on [Slack](https://nf-core-invite.herokuapp.com/). +For further information/help, please consult the [nf-core/chipseq documentation](https://github.com/nf-core/chipseq#documentation) and don't hesitate to get in touch on the [nf-core/chipseq pipeline channel](https://nfcore.slack.com/channels/chipseq) on [Slack](https://nf-co.re/join/slack/). From bc26442340087e195a228454919b22477edadcd4 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 10 Oct 2019 16:39:10 +0100 Subject: [PATCH 027/113] Update nextflow.config --- .travis.yml | 2 +- Dockerfile | 2 +- nextflow.config | 44 ++++++++++++++++++++++++-------------------- 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6140ebab..6b1fd10c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,7 @@ matrix: before_install: # PRs to master are only ok if coming from dev branch - - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && [ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ])' + - '[ $TRAVIS_PULL_REQUEST = "false" ] || [ $TRAVIS_BRANCH != "master" ] || ([ $TRAVIS_PULL_REQUEST_SLUG = $TRAVIS_REPO_SLUG ] && ([ $TRAVIS_PULL_REQUEST_BRANCH = "dev" ] || [ $TRAVIS_PULL_REQUEST_BRANCH = "patch" ]))' # Pull the docker image first so the test doesn't wait for this - docker pull nfcore/chipseq:dev # Fake the tag locally so that the pipeline runs properly diff --git a/Dockerfile b/Dockerfile index ce625eee..e1fc9efe 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM nfcore/base +FROM nfcore/base:1.7 LABEL authors="Philip Ewels" \ description="Docker image containing all requirements for nf-core/chipseq pipeline" diff --git a/nextflow.config b/nextflow.config index c1170223..0d9c60a6 100644 --- a/nextflow.config +++ b/nextflow.config @@ -50,15 +50,6 @@ params { skipMultiQC = false skipMultiQCStats = false - // Options: Other - outdir = './results' - igenomes_base = "./iGenomes" - email = false - plaintext_email = false - monochrome_logs = false - name = false - maxMultiqcEmailFileSize = 25.MB - // Options: AWSBatch awsqueue = false awsregion = 'eu-west-1' @@ -69,18 +60,26 @@ params { bamtools_filter_se_config = "$baseDir/assets/bamtools_filter_se.json" // Options: Custom config + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" config_profile_description = false config_profile_contact = false config_profile_url = false - // Options: Default + // Options: Other help = false - hostnames = false + outdir = './results' + igenomes_base = "./iGenomes" + igenomesIgnore = false + maxMultiqcEmailFileSize = 25.MB tracedir = "${params.outdir}/pipeline_info" + email = false + email_on_fail = false + plaintext_email = false + monochrome_logs = false + name = false + hostnames = false clusterOptions = false - igenomesIgnore = false - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" } @@ -107,8 +106,13 @@ profiles { test { includeConfig 'conf/test.config' } } +// Avoid this error: +// WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. +// Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351, once this is established and works well, nextflow might implement this behavior as new default. +docker.runOptions = '-u \$(id -u):\$(id -g)' + // Load igenomes.config if required -if(!params.igenomesIgnore){ +if (!params.igenomesIgnore) { includeConfig 'conf/igenomes.config' } @@ -148,9 +152,9 @@ manifest { // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { - if(type == 'memory'){ + if (type == 'memory') { try { - if(obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) return params.max_memory as nextflow.util.MemoryUnit else return obj @@ -158,9 +162,9 @@ def check_max(obj, type) { println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" return obj } - } else if(type == 'time'){ + } else if (type == 'time') { try { - if(obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) return params.max_time as nextflow.util.Duration else return obj @@ -168,7 +172,7 @@ def check_max(obj, type) { println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" return obj } - } else if(type == 'cpus'){ + } else if (type == 'cpus') { try { return Math.min( obj, params.max_cpus as int ) } catch (all) { From 7dfe5aac117f52091229014965be4e5db02a2001 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 10 Oct 2019 16:39:35 +0100 Subject: [PATCH 028/113] Update logo names --- ...re-chipseq_logo.png => nf-core-chipseq_logo.png} | Bin ...re-chipseq_logo.png => nf-core-chipseq_logo.png} | Bin 2 files changed, 0 insertions(+), 0 deletions(-) rename assets/{nfcore-chipseq_logo.png => nf-core-chipseq_logo.png} (100%) rename docs/images/{nfcore-chipseq_logo.png => nf-core-chipseq_logo.png} (100%) diff --git a/assets/nfcore-chipseq_logo.png b/assets/nf-core-chipseq_logo.png similarity index 100% rename from assets/nfcore-chipseq_logo.png rename to assets/nf-core-chipseq_logo.png diff --git a/docs/images/nfcore-chipseq_logo.png b/docs/images/nf-core-chipseq_logo.png similarity index 100% rename from docs/images/nfcore-chipseq_logo.png rename to docs/images/nf-core-chipseq_logo.png From 2c93d4d7a71da4433fdcbdcf67007c64bb5cbc5f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 10 Oct 2019 16:39:59 +0100 Subject: [PATCH 029/113] Update email files --- assets/email_template.html | 2 +- assets/email_template.txt | 12 +++++++++--- assets/sendmail_template.txt | 8 ++++---- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/assets/email_template.html b/assets/email_template.html index 30127d9a..a3f19225 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -11,7 +11,7 @@
- +

nf-core/chipseq v${version}

Run Name: $runName

diff --git a/assets/email_template.txt b/assets/email_template.txt index 25f87153..6d35a697 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -1,6 +1,12 @@ -======================================== - nf-core/chipseq v${version} -======================================== +---------------------------------------------------- + ,--./,-. + ___ __ __ __ ___ /,-._.--~\\ + |\\ | |__ __ / ` / \\ |__) |__ } { + | \\| | \\__, \\__/ | \\ |___ \\`-._,-`-, + `._,._,' + nf-core/chipseq v${version} +---------------------------------------------------- + Run Name: $runName <% if (success){ diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index b316b04d..0042bf1d 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -9,12 +9,12 @@ Content-Type: text/html; charset=utf-8 $email_html --nfcoremimeboundary -Content-Type: image/png;name="nfcore-chipseq_logo.png" +Content-Type: image/png;name="nf-core-chipseq_logo.png" Content-Transfer-Encoding: base64 -Content-ID: -Content-Disposition: inline; filename="nfcore-chipseq_logo.png" +Content-ID: +Content-Disposition: inline; filename="nf-core-chipseq_logo.png" -<% out << new File("$baseDir/assets/nfcore-chipseq_logo.png"). +<% out << new File("$baseDir/assets/nf-core-chipseq_logo.png"). bytes. encodeBase64(). toString(). From 0930c54f566f0864182fdaba534e0c7ef9f74983 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 10 Oct 2019 16:40:10 +0100 Subject: [PATCH 030/113] Update software versions --- bin/scrape_software_versions.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 2640bb4f..4e8dfbcd 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -44,12 +44,15 @@ # Search each file using its regex for k, v in regexes.items(): - with open(v[0]) as x: - versions = x.read() - match = re.search(v[1], versions) - if match: - results[k] = "v{}".format(match.group(1)) - + try: + with open(v[0]) as x: + versions = x.read() + match = re.search(v[1], versions) + if match: + results[k] = "v{}".format(match.group(1)) + except IOError: + results[k] = False + # Remove software set to false in results for k in results: if not results[k]: From f03fa1d548c7e376d995746213a0dbccbc6bcf2d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 10 Oct 2019 16:40:21 +0100 Subject: [PATCH 031/113] Update README --- CODE_OF_CONDUCT.md | 2 +- README.md | 14 +++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 09226d0d..1cda7600 100755 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -34,7 +34,7 @@ This Code of Conduct applies both within project spaces and in public spaces whe ## Enforcement -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-core-invite.herokuapp.com/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-co.re/join/slack/). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. diff --git a/README.md b/README.md index d15f5331..4c7b78f4 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# ![nf-core/chipseq](docs/images/nfcore-chipseq_logo.png) +# ![nf-core/chipseq](docs/images/nf-core-chipseq_logo.png) [![Build Status](https://travis-ci.org/nf-core/chipseq.svg?branch=master)](https://travis-ci.org/nf-core/chipseq) [![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.32.0-brightgreen.svg)](https://www.nextflow.io/) @@ -8,6 +8,7 @@ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3240507.svg)](https://doi.org/10.5281/zenodo.3240507) ## Introduction + **nfcore/chipseq** is a bioinformatics analysis pipeline used for Chromatin ImmunopreciPitation sequencing (ChIP-seq) data. The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. @@ -65,6 +66,7 @@ nextflow run nf-core/chipseq -profile --design design See [usage docs](docs/usage.md) for all of the available options when running the pipeline. ## Documentation + The nf-core/chipseq pipeline comes with documentation about the pipeline, found in the `docs/` directory: 1. [Installation](https://nf-co.re/usage/installation) @@ -80,11 +82,17 @@ The nf-core/chipseq pipeline comes with documentation about the pipeline, found These scripts were orginally written by Chuan Wang ([@chuan-wang](https://github.com/chuan-wang)) and Phil Ewels ([@ewels](https://github.com/ewels)) for use at the [National Genomics Infrastructure](https://portal.scilifelab.se/genomics/) at [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden. It has since been re-implemented by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [The Bioinformatics & Biostatistics Group](https://www.crick.ac.uk/research/science-technology-platforms/bioinformatics-and-biostatistics/) at [The Francis Crick Institute](https://www.crick.ac.uk/), London. -Many thanks to others who have helped out along the way too, including (but not limited to): [@apeltzer](https://github.com/apeltzer), [@bc2zb](https://github.com/bc2zb), [@drejom](https://github.com/drejom), [@KevinMenden](https://github.com/KevinMenden), [@pditommaso](https://github.com/pditommaso). +Many thanks to others who have helped out along the way too, including (but not limited to): [@apeltzer](https://github.com/apeltzer), [@bc2zb](https://github.com/bc2zb), [@drejom](https://github.com/drejom), [@KevinMenden](https://github.com/KevinMenden), [@crickbabs](https://github.com/crickbabs), [@pditommaso](https://github.com/pditommaso). + +## Contributions and Support + +If you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md). + +For further information or help, don't hesitate to get in touch on [Slack](https://nfcore.slack.com/channels/chipseq) (you can join with [this invite](https://nf-co.re/join/slack)). ## Citation If you use nf-core/chipseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3240506](https://doi.org/10.5281/zenodo.3240506) You can cite the `nf-core` pre-print as follows: -Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**. *bioRxiv*. 2019. p. 610741. [doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1). +> Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**. *bioRxiv*. 2019. p. 610741. [doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1). From ab3b55913826086291adcdb5b916d5958161d125 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 10 Oct 2019 16:40:39 +0100 Subject: [PATCH 032/113] Update base config --- conf/base.config | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/conf/base.config b/conf/base.config index 1aee5956..12f1cff4 100644 --- a/conf/base.config +++ b/conf/base.config @@ -12,8 +12,8 @@ process { cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 8.GB * task.attempt, 'memory' ) } - time = { check_max( 2.h * task.attempt, 'time' ) } + memory = { check_max( 7.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } maxRetries = 1 @@ -22,29 +22,30 @@ process { // Process-specific resource requirements withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + memory = { check_max( 14.GB * task.attempt, 'memory' ) } + time = { check_max( 6.h * task.attempt, 'time' ) } } withLabel:process_medium { cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 48.GB * task.attempt, 'memory' ) } + memory = { check_max( 42.GB * task.attempt, 'memory' ) } time = { check_max( 8.h * task.attempt, 'time' ) } } withLabel:process_high { cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 96.GB * task.attempt, 'memory' ) } + memory = { check_max( 84.GB * task.attempt, 'memory' ) } time = { check_max( 16.h * task.attempt, 'time' ) } } withLabel:process_long { - time = { check_max( 12.h * task.attempt, 'time' ) } + time = { check_max( 20.h * task.attempt, 'time' ) } } withName:get_software_versions { cache = false } + } -// Defaults config params, may be overwritten by later configs params { + // Defaults only, expecting to be overwritten max_memory = 128.GB max_cpus = 16 max_time = 240.h From 35c81e0dfe9f96f12cbaa47ef1a25750d6b5eef1 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 10 Oct 2019 16:40:47 +0100 Subject: [PATCH 033/113] Update igenomes --- conf/igenomes.config | 561 +++++++++++++++++++++++++++++-------------- 1 file changed, 386 insertions(+), 175 deletions(-) diff --git a/conf/igenomes.config b/conf/igenomes.config index 7fe643ec..37217cf4 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -7,180 +7,391 @@ * path using $params.igenomes_base / --igenomes_base */ - params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - blacklist = "${baseDir}/assets/blacklists/GRCh37-blacklist.bed" - macs_gsize = "2.7e9" - } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" - macs_gsize = "2.7e9" - } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - blacklist = "${baseDir}/assets/blacklists/GRCm38-blacklist.bed" - macs_gsize = "1.87e9" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - macs_gsize = "9e7" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - macs_gsize = "1.2e8" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - macs_gsize = "1.2e7" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - macs_gsize = "1.21e7" - } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" - macs_gsize = "2.7e9" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - blacklist = "${baseDir}/assets/blacklists/hg19-blacklist.bed" - macs_gsize = "2.7e9" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - blacklist = "${baseDir}/assets/blacklists/mm10-blacklist.bed" - macs_gsize = "1.87e9" - } +params { + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "2.7e9" + blacklist = "${baseDir}/assets/blacklists/GRCh37-blacklist.bed" + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.87e9" + blacklist = "${baseDir}/assets/blacklists/GRCm38-blacklist.bed" + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mito_name = "MtDNA" + macs_gsize = "9e7" + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mito_name = "M" + macs_gsize = "1.2e8" + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.2e7" + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = "1.21e7" + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + mito_name = "MT" + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mito_name = "Mt" + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "2.7e9" + blacklist = "${baseDir}/assets/blacklists/hg19-blacklist.bed" + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.87e9" + blacklist = "${baseDir}/assets/blacklists/mm10-blacklist.bed" + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "9e7" + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = "1.2e8" + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mito_name = "chrM" + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" + mito_name = "chrM" + macs_gsize = "1.2e7" + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/genome.fa" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + mito_name = "chrM" + } } } From 6eb77af5acff4e676e491bab82a91b5e76f77edc Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 10 Oct 2019 16:40:58 +0100 Subject: [PATCH 034/113] Update docs --- docs/usage.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 7da137df..8e2a3103 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -51,6 +51,7 @@ * [Other command line parameters](#other-command-line-parameters) * [`--outdir`](#--outdir) * [`--email`](#--email) + * [`--email_on_fail`](#--email_on_fail) * [`-name`](#-name) * [`-resume`](#-resume) * [`-c`](#-c) @@ -110,7 +111,7 @@ This version number will be logged in reports when you run the pipeline, so that ## Main arguments ### `-profile` -Use this parameter to choose a configuration profile. Profiles can give configuration pre-sets for different compute environments. Note that multiple profiles can be loaded, for example: `-profile docker` - the order of arguments is important! +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. Note that multiple profiles can be loaded, for example: `-profile docker` - the order of arguments is important! If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`. @@ -400,7 +401,7 @@ Wherever process-specific requirements are set in the pipeline, the default valu If you are likely to be running `nf-core` pipelines regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition below). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. -If you have any questions or issues please send us a message on [Slack](https://nf-core-invite.herokuapp.com/). +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack/). ## AWS Batch specific parameters Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use the `-awsbatch` profile and then specify all of the following parameters. @@ -419,6 +420,9 @@ The output directory where the results will be saved. ### `--email` Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. +### `--email_on_fail` +This works exactly as with `--email`, except emails are only sent if the workflow is not successful. + ### `-name` Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. From f6797857ffde58be042f95aba5ffa87e6d3ec7ed Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 10 Oct 2019 16:41:08 +0100 Subject: [PATCH 035/113] Update main script --- main.nf | 55 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/main.nf b/main.nf index 9ebffc96..29563b65 100755 --- a/main.nf +++ b/main.nf @@ -74,6 +74,7 @@ def helpMessage() { Other --outdir The output directory where the results will be saved --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits + --email_on_fail Same as --email, except only send mail if the workflow is not successful --maxMultiqcEmailFileSize Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic @@ -182,7 +183,7 @@ if (params.bwa_index){ /* -- AWS -- */ //////////////////////////////////////////////////// -if( workflow.profile == 'awsbatch') { +if ( workflow.profile == 'awsbatch') { // AWSBatch sanity checking if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" // Check outdir paths to be S3 buckets if running on AWSBatch @@ -245,7 +246,7 @@ if (params.skipIGV) summary['Skip IGV'] = 'Yes' if (params.skipMultiQC) summary['Skip MultiQC'] = 'Yes' if (params.skipMultiQCStats) summary['Skip MultiQC Stats'] = 'Yes' summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -if(workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" +if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" summary['Output Dir'] = params.outdir summary['Launch Dir'] = workflow.launchDir summary['Working Dir'] = workflow.workDir @@ -259,8 +260,9 @@ summary['Config Profile'] = workflow.profile if (params.config_profile_description) summary['Config Description'] = params.config_profile_description if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact if (params.config_profile_url) summary['Config URL'] = params.config_profile_url -if(params.email) { +if (params.email || params.email_on_fail) { summary['E-mail Address'] = params.email + summary['E-mail on failure'] = params.email_on_fail summary['MultiQC Max Size'] = params.maxMultiqcEmailFileSize } log.info summary.collect { k,v -> "${k.padRight(20)}: $v" }.join("\n") @@ -1515,7 +1517,7 @@ workflow.onComplete { // Set up the e-mail variables def subject = "[nf-core/chipseq] Successful: $workflow.runName" - if(!workflow.success){ + if (!workflow.success) { subject = "[nf-core/chipseq] FAILED: $workflow.runName" } def email_fields = [:] @@ -1534,20 +1536,21 @@ workflow.onComplete { email_fields['summary']['Date Completed'] = workflow.complete email_fields['summary']['Pipeline script file path'] = workflow.scriptFile email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if(workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if(workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if(workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - if(workflow.container) email_fields['summary']['Docker image'] = workflow.container + if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision + if (workflow.container) email_fields['summary']['Docker image'] = workflow.container email_fields['summary']['Nextflow Version'] = workflow.nextflow.version email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + // TODO nf-core: If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize) // On success try attach the multiqc report def mqc_report = null try { if (workflow.success) { - mqc_report = ch_multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList){ + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList) { log.warn "[nf-core/chipseq] Found multiple reports from process 'multiqc', will use only one" mqc_report = mqc_report[0] } @@ -1556,6 +1559,12 @@ workflow.onComplete { log.warn "[nf-core/chipseq] Could not attach MultiQC report to summary email" } + // Check if we are only sending emails on failure + email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + // Render the TXT template def engine = new groovy.text.GStringTemplateEngine() def tf = new File("$baseDir/assets/email_template.txt") @@ -1568,29 +1577,29 @@ workflow.onComplete { def email_html = html_template.toString() // Render the sendmail template - def smail_fields = [ email: params.email, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.maxMultiqcEmailFileSize.toBytes() ] + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.maxMultiqcEmailFileSize.toBytes() ] def sf = new File("$baseDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) def sendmail_html = sendmail_template.toString() // Send the HTML e-mail - if (params.email) { + if (email_address) { try { - if( params.plaintext_email ){ throw GroovyException('Send plaintext e-mail, not HTML') } + if ( params.plaintext_email ){ throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/chipseq] Sent summary e-mail to $params.email (sendmail)" + log.info "[nf-core/chipseq] Sent summary e-mail to $email_address (sendmail)" } catch (all) { // Catch failures and try with plaintext - [ 'mail', '-s', subject, params.email ].execute() << email_txt - log.info "[nf-core/chipseq] Sent summary e-mail to $params.email (mail)" + [ 'mail', '-s', subject, email_address ].execute() << email_txt + log.info "[nf-core/chipseq] Sent summary e-mail to $email_address (mail)" } } // Write summary e-mail HTML to a file def output_d = new File( "${params.outdir}/pipeline_info/" ) - if( !output_d.exists() ) { - output_d.mkdirs() + if (!output_d.exists()) { + output_d.mkdirs() } def output_hf = new File( output_d, "pipeline_report.html" ) output_hf.withWriter { w -> w << email_html } @@ -1608,7 +1617,7 @@ workflow.onComplete { log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}" } - if(workflow.success){ + if (workflow.success) { log.info "${c_purple}[nf-core/chipseq]${c_green} Pipeline completed successfully${c_reset}" } else { checkHostname() @@ -1637,14 +1646,14 @@ def nfcoreHeader(){ c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; c_white = params.monochrome_logs ? '' : "\033[0;37m"; - return """ ${c_dim}----------------------------------------------------${c_reset} + return """ -${c_dim}--------------------------------------------------${c_reset}- ${c_green},--.${c_black}/${c_green},-.${c_reset} ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} ${c_green}`._,._,\'${c_reset} ${c_purple} nf-core/chipseq v${workflow.manifest.version}${c_reset} - ${c_dim}----------------------------------------------------${c_reset} + -${c_dim}--------------------------------------------------${c_reset}- """.stripIndent() } @@ -1653,11 +1662,11 @@ def checkHostname(){ def c_white = params.monochrome_logs ? '' : "\033[0;37m" def c_red = params.monochrome_logs ? '' : "\033[1;91m" def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" - if(params.hostnames){ + if (params.hostnames) { def hostname = "hostname".execute().text.trim() params.hostnames.each { prof, hnames -> hnames.each { hname -> - if(hostname.contains(hname) && !workflow.profile.contains(prof)){ + if (hostname.contains(hname) && !workflow.profile.contains(prof)) { log.error "====================================================\n" + " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + From 96a0f0edf4ee0d34abdc3498d6498ae7cab1ca31 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 10 Oct 2019 17:05:11 +0100 Subject: [PATCH 036/113] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a39267ed..7aae65a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#46](https://github.com/nf-core/atacseq/issues/46) - Missing gene_bed path in igenomes config * Capitalised process names * Add quick start information to main README +* Update template to tools v1.7 ### `Fixed` From 5af432c178265988601925cd15f42585a7f8d4e1 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 11 Oct 2019 10:20:33 +0100 Subject: [PATCH 037/113] Bump nextflow version --- .travis.yml | 2 +- README.md | 2 +- nextflow.config | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 6b1fd10c..ae8b1235 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,7 +30,7 @@ install: - sudo apt-get install npm && npm install -g markdownlint-cli env: - - NXF_VER='0.32.0' # Specify a minimum NF version that should be tested and work + - NXF_VER='19.04.0' # Specify a minimum NF version that should be tested and work - NXF_VER='' # Plus: get the latest NF version and check that it works script: diff --git a/README.md b/README.md index 4c7b78f4..2685a359 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # ![nf-core/chipseq](docs/images/nf-core-chipseq_logo.png) [![Build Status](https://travis-ci.org/nf-core/chipseq.svg?branch=master)](https://travis-ci.org/nf-core/chipseq) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A50.32.0-brightgreen.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.04.0-brightgreen.svg)](https://www.nextflow.io/) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) [![Docker](https://img.shields.io/docker/automated/nfcore/chipseq.svg)](https://hub.docker.com/r/nfcore/chipseq/) diff --git a/nextflow.config b/nextflow.config index 0d9c60a6..25d9bf58 100644 --- a/nextflow.config +++ b/nextflow.config @@ -145,7 +145,7 @@ manifest { homePage = 'https://github.com/nf-core/chipseq' description = 'ChIP-seq peak-calling and differential analysis pipeline.' mainScript = 'main.nf' - nextflowVersion = '>=0.32.0' + nextflowVersion = '>=19.04.0' version = '1.0.1dev' } From 0d17250fa2157af307c2130c66a0407da71f72f1 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 11 Oct 2019 10:26:27 +0100 Subject: [PATCH 038/113] Update CHANGELOG --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7aae65a1..ff3579d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#46](https://github.com/nf-core/atacseq/issues/46) - Missing gene_bed path in igenomes config * Capitalised process names * Add quick start information to main README -* Update template to tools v1.7 +* Update template to tools `1.7` +* Bump Nextflow version to `19.04.0` ### `Fixed` From 685fe17a980cbe5f67a66994e1bffd8ca889f9e2 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 11 Oct 2019 13:03:20 +0100 Subject: [PATCH 039/113] Update blacklist staging --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 29563b65..2d80880f 100755 --- a/main.nf +++ b/main.nf @@ -160,7 +160,7 @@ if (params.design) { ch_design = file(params.design, checkIfExists: true) } e if (params.gtf) { ch_gtf = file(params.gtf, checkIfExists: true) } else { exit 1, "GTF annotation file not specified!" } if (params.gene_bed) { ch_gene_bed = file(params.gene_bed, checkIfExists: true) } if (params.tss_bed) { ch_tss_bed = file(params.tss_bed, checkIfExists: true) } -if (params.blacklist) { ch_blacklist = file(params.blacklist, checkIfExists: true) } else { ch_blacklist = Channel.empty() } +if (params.blacklist) { ch_blacklist = Channel.fromPath(params.blacklist, checkIfExists: true) } else { ch_blacklist = Channel.empty() } if (params.fasta){ lastPath = params.fasta.lastIndexOf(File.separator) From cbf103c7e47812df58170126fb532fd488959cd1 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 11 Oct 2019 14:05:08 +0100 Subject: [PATCH 040/113] Update params --- assets/multiqc/peak_annotation_header.txt | 3 --- assets/multiqc/peak_annotation_header_cp.txt | 12 ++++++++++++ main.nf | 16 +++++++++------- 3 files changed, 21 insertions(+), 10 deletions(-) create mode 100644 assets/multiqc/peak_annotation_header_cp.txt diff --git a/assets/multiqc/peak_annotation_header.txt b/assets/multiqc/peak_annotation_header.txt index 14f89548..e67a41c1 100644 --- a/assets/multiqc/peak_annotation_header.txt +++ b/assets/multiqc/peak_annotation_header.txt @@ -7,6 +7,3 @@ #pconfig: # title: 'Peak to feature %' # ylab: 'Feature %' -# ymax: 100 -# ymin: 0 -# cpswitch_c_active: false diff --git a/assets/multiqc/peak_annotation_header_cp.txt b/assets/multiqc/peak_annotation_header_cp.txt new file mode 100644 index 00000000..14f89548 --- /dev/null +++ b/assets/multiqc/peak_annotation_header_cp.txt @@ -0,0 +1,12 @@ +#id: 'peak_annotation' +#section_name: 'HOMER: Peak annotation' +#description: "is generated by calculating the proportion of peaks assigned to genomic features by +# HOMER annotatePeaks.pl." +#plot_type: 'bargraph' +#anchor: 'nfcore_chipseq-peak_annotation' +#pconfig: +# title: 'Peak to feature %' +# ylab: 'Feature %' +# ymax: 100 +# ymin: 0 +# cpswitch_c_active: false diff --git a/main.nf b/main.nf index 2d80880f..a082fa47 100755 --- a/main.nf +++ b/main.nf @@ -463,14 +463,14 @@ process FastQC { if (params.singleEnd) { """ [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz - fastqc -q ${name}.fastq.gz + fastqc -q -t $task.cpus ${name}.fastq.gz """ } else { """ [ ! -f ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz [ ! -f ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz - fastqc -q ${name}_1.fastq.gz - fastqc -q ${name}_2.fastq.gz + fastqc -q -t $task.cpus ${name}_1.fastq.gz + fastqc -q -t $task.cpus ${name}_2.fastq.gz """ } } @@ -945,7 +945,7 @@ process PlotProfile { --afterRegionStartLength 3000 \\ --skipZeros \\ --smartLabels \\ - -p $task.cpus + --numberOfProcessors $task.cpus plotProfile --matrixFile ${name}.computeMatrix.mat.gz \\ --outFileName ${name}.plotProfile.pdf \\ @@ -1037,8 +1037,8 @@ process PlotFingerprint { --outQualityMetrics ${ip}.plotFingerprint.qcmetrics.txt \\ --skipZeros \\ --JSDsample ${controlbam[0]} \\ - --numberOfProcessors ${task.cpus} \\ - --numberOfSamples ${params.fingerprint_bins} + --numberOfProcessors $task.cpus \\ + --numberOfSamples $params.fingerprint_bins """ } @@ -1080,7 +1080,7 @@ process MACSCallPeak { -c ${controlbam[0]} \\ $broad \\ -f $format \\ - -g ${params.macs_gsize} \\ + -g $params.macs_gsize \\ -n $ip \\ $pileup \\ --keep-dup all @@ -1120,6 +1120,7 @@ process AnnotatePeaks { $fasta \\ -gid \\ -gtf $gtf \\ + -cpu $task.cpus \\ > ${ip}_peaks.annotatePeaks.txt """ } @@ -1256,6 +1257,7 @@ process ConsensusPeakSetAnnotate { $fasta \\ -gid \\ -gtf $gtf \\ + -cpu $task.cpus \\ > ${prefix}.annotatePeaks.txt cut -f2- ${prefix}.annotatePeaks.txt | awk 'NR==1; NR > 1 {print \$0 | "sort -k1,1 -k2,2n"}' | cut -f6- > tmp.txt From a1c3e69d615d90cad0ef6129aee5d28e178a3e65 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 11 Oct 2019 14:16:16 +0100 Subject: [PATCH 041/113] Missed 2M in header log info --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index a082fa47..c364a00d 100755 --- a/main.nf +++ b/main.nf @@ -266,7 +266,7 @@ if (params.email || params.email_on_fail) { summary['MultiQC Max Size'] = params.maxMultiqcEmailFileSize } log.info summary.collect { k,v -> "${k.padRight(20)}: $v" }.join("\n") -log.info "\033[2m----------------------------------------------------\033[0m" +log.info "-\033[2m--------------------------------------------------\033[0m-" // Check the hostnames against configured profiles checkHostname() From 34f07fa8a57d50628709e7b362660b3f1a799150 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 11 Oct 2019 14:22:32 +0100 Subject: [PATCH 042/113] Fix MultiQC HOMER peak to feature plot --- assets/multiqc/peak_annotation_header.txt | 4 ++-- assets/multiqc/peak_annotation_header_cp.txt | 12 ------------ 2 files changed, 2 insertions(+), 14 deletions(-) delete mode 100644 assets/multiqc/peak_annotation_header_cp.txt diff --git a/assets/multiqc/peak_annotation_header.txt b/assets/multiqc/peak_annotation_header.txt index e67a41c1..1aff7522 100644 --- a/assets/multiqc/peak_annotation_header.txt +++ b/assets/multiqc/peak_annotation_header.txt @@ -5,5 +5,5 @@ #plot_type: 'bargraph' #anchor: 'nfcore_chipseq-peak_annotation' #pconfig: -# title: 'Peak to feature %' -# ylab: 'Feature %' +# title: 'Peak to feature proportion' +# ylab: 'Peak count' diff --git a/assets/multiqc/peak_annotation_header_cp.txt b/assets/multiqc/peak_annotation_header_cp.txt deleted file mode 100644 index 14f89548..00000000 --- a/assets/multiqc/peak_annotation_header_cp.txt +++ /dev/null @@ -1,12 +0,0 @@ -#id: 'peak_annotation' -#section_name: 'HOMER: Peak annotation' -#description: "is generated by calculating the proportion of peaks assigned to genomic features by -# HOMER annotatePeaks.pl." -#plot_type: 'bargraph' -#anchor: 'nfcore_chipseq-peak_annotation' -#pconfig: -# title: 'Peak to feature %' -# ylab: 'Feature %' -# ymax: 100 -# ymin: 0 -# cpswitch_c_active: false From cdcb1fb6297307f16f177f2f02b9a6473647a9b9 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 11 Oct 2019 15:21:35 +0100 Subject: [PATCH 043/113] Update CHANGELOG --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff3579d6..b050eec2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,8 +17,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Fixed` -* [#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly * [#44](https://github.com/nf-core/atacseq/issues/44) - Output directory missing: macs2/consensus/deseq2 +* [#45](https://github.com/nf-core/atacseq/issues/45) - Wrong x-axis scale for the HOMER: Peak annotation Counts tab plot? +* [#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly * Increase default resource requirements in `base.config` * Increase process-specific requirements based on user-reported failures From bc3405970e1e91200eeacd121dd7412189683fb7 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 11 Oct 2019 15:52:27 +0100 Subject: [PATCH 044/113] Fix levels in R scripts --- bin/plot_homer_annotatepeaks.r | 6 +++--- bin/plot_macs_qc.r | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/plot_homer_annotatepeaks.r b/bin/plot_homer_annotatepeaks.r index ee537fce..9865357c 100755 --- a/bin/plot_homer_annotatepeaks.r +++ b/bin/plot_homer_annotatepeaks.r @@ -87,9 +87,9 @@ for (idx in 1:length(HomerFiles)) { plot.dist.dat <- rbind(plot.dist.dat,dist.melt) } -levels(plot.dat$name) <- sort(unique(as.character(plot.dat$name))) -levels(plot.dist.dat$variable) <- sort(unique(as.character(plot.dist.dat$variable))) -levels(plot.feature.dat$variable) <- sort(unique(as.character(plot.feature.dat$variable))) +plot.dat$name <- factor(plot.dat$name, levels=sort(unique(as.character(plot.dat$name)))) +plot.dist.dat$variable <- factor(plot.dist.dat$variable, levels=sort(unique(as.character(plot.dist.dat$variable)))) +plot.feature.dat$variable <- factor(plot.feature.dat$variable, levels=sort(unique(as.character(plot.feature.dat$variable)))) summary.dat <- dcast(plot.feature.dat, variable ~ feature, value.var="value") colnames(summary.dat)[1] <- "sample" diff --git a/bin/plot_macs_qc.r b/bin/plot_macs_qc.r index e43fcb8c..2360505f 100755 --- a/bin/plot_macs_qc.r +++ b/bin/plot_macs_qc.r @@ -81,7 +81,7 @@ for (idx in 1:length(PeakFiles)) { peaks.dat$name <- rep(sampleid,nrow(peaks.dat)) plot.dat <- rbind(plot.dat,peaks.dat) } -levels(plot.dat$name) <- sort(unique(as.character(plot.dat$name))) +plot.dat$name <- factor(plot.dat$name, levels=sort(unique(as.character(plot.dat$name)))) SummaryFile <- file.path(opt$outdir,paste(opt$outprefix,".summary.txt",sep="")) write.table(summary.dat,file=SummaryFile,quote=FALSE,sep="\t",row.names=FALSE,col.names=TRUE) From 8cde41cc5e8703c3326d73cfe2971543ce6e144a Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 11 Oct 2019 15:55:36 +0100 Subject: [PATCH 045/113] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b050eec2..a9fbd87a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#44](https://github.com/nf-core/atacseq/issues/44) - Output directory missing: macs2/consensus/deseq2 * [#45](https://github.com/nf-core/atacseq/issues/45) - Wrong x-axis scale for the HOMER: Peak annotation Counts tab plot? * [#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly +* [#50](https://github.com/nf-core/atacseq/issues/50) - HOMER number of peaks does not correspond to found MACS2 peaks * Increase default resource requirements in `base.config` * Increase process-specific requirements based on user-reported failures From da8dd9a7f5e0f31a1a2ca462df2c7b0c6382131b Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 11 Oct 2019 16:30:22 +0100 Subject: [PATCH 046/113] Fix MultiQC channel --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index c364a00d..a7751c20 100755 --- a/main.nf +++ b/main.nf @@ -1551,7 +1551,7 @@ workflow.onComplete { def mqc_report = null try { if (workflow.success) { - mqc_report = multiqc_report.getVal() + mqc_report = ch_multiqc_report.getVal() if (mqc_report.getClass() == ArrayList) { log.warn "[nf-core/chipseq] Found multiple reports from process 'multiqc', will use only one" mqc_report = mqc_report[0] From 839078f0452cfff2129fc9b658f401508d4de9e4 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 15 Oct 2019 13:17:17 +0100 Subject: [PATCH 047/113] Change maxMultiqcEmailFileSize to max_multiqc_email_size --- docs/usage.md | 4 ++++ main.nf | 8 ++++---- nextflow.config | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 8e2a3103..9c3e5f09 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -52,6 +52,7 @@ * [`--outdir`](#--outdir) * [`--email`](#--email) * [`--email_on_fail`](#--email_on_fail) + * [`--max_multiqc_email_size`](#--max_multiqc_email_size) * [`-name`](#-name) * [`-resume`](#-resume) * [`-c`](#-c) @@ -423,6 +424,9 @@ Set this parameter to your e-mail address to get a summary e-mail with details o ### `--email_on_fail` This works exactly as with `--email`, except emails are only sent if the workflow is not successful. +### `--max_multiqc_email_size` +Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB). + ### `-name` Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. diff --git a/main.nf b/main.nf index a7751c20..2a266dde 100755 --- a/main.nf +++ b/main.nf @@ -75,7 +75,7 @@ def helpMessage() { --outdir The output directory where the results will be saved --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits --email_on_fail Same as --email, except only send mail if the workflow is not successful - --maxMultiqcEmailFileSize Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) + --max_multiqc_email_size Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic AWSBatch @@ -263,7 +263,7 @@ if (params.config_profile_url) summary['Config URL'] = params.co if (params.email || params.email_on_fail) { summary['E-mail Address'] = params.email summary['E-mail on failure'] = params.email_on_fail - summary['MultiQC Max Size'] = params.maxMultiqcEmailFileSize + summary['MultiQC Max Size'] = params.max_multiqc_email_size } log.info summary.collect { k,v -> "${k.padRight(20)}: $v" }.join("\n") log.info "-\033[2m--------------------------------------------------\033[0m-" @@ -1546,7 +1546,7 @@ workflow.onComplete { email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - // TODO nf-core: If not using MultiQC, strip out this code (including params.maxMultiqcEmailFileSize) + // TODO nf-core: If not using MultiQC, strip out this code (including params.max_multiqc_email_size) // On success try attach the multiqc report def mqc_report = null try { @@ -1579,7 +1579,7 @@ workflow.onComplete { def email_html = html_template.toString() // Render the sendmail template - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.maxMultiqcEmailFileSize.toBytes() ] + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] def sf = new File("$baseDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) def sendmail_html = sendmail_template.toString() diff --git a/nextflow.config b/nextflow.config index 25d9bf58..48631867 100644 --- a/nextflow.config +++ b/nextflow.config @@ -71,7 +71,7 @@ params { outdir = './results' igenomes_base = "./iGenomes" igenomesIgnore = false - maxMultiqcEmailFileSize = 25.MB + max_multiqc_email_size = 25.MB tracedir = "${params.outdir}/pipeline_info" email = false email_on_fail = false From 3c9a729b28301d6fba487cc91386c9bc11a6043b Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 15 Oct 2019 13:26:26 +0100 Subject: [PATCH 048/113] Change saveGenomeIndex to saveReference --- docs/output.md | 4 ++-- docs/usage.md | 6 +++--- main.nf | 8 ++++---- nextflow.config | 2 +- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/output.md b/docs/output.md index 5ad6f35c..4c48e046 100644 --- a/docs/output.md +++ b/docs/output.md @@ -53,7 +53,7 @@ The initial QC and alignments are performed at the library-level e.g. if the sam [BWA](http://bio-bwa.sourceforge.net/bwa.shtml), [SAMtools](http://samtools.sourceforge.net/) *Description*: - Adapter-trimmed reads are mapped to the reference assembly using BWA. A genome index is required to run BWA so if this is not provided explicitly using the `--bwa_index` parameter then it will be created automatically from the genome fasta input. The index creation process can take a while for larger genomes so it is possible to use the `--saveGenomeIndex` parameter to save the indices for future pipeline runs, reducing processing times. + Adapter-trimmed reads are mapped to the reference assembly using BWA. A genome index is required to run BWA so if this is not provided explicitly using the `--bwa_index` parameter then it will be created automatically from the genome fasta input. The index creation process can take a while for larger genomes so it is possible to use the `--saveReference` parameter to save the indices for future pipeline runs, reducing processing times. ![MultiQC - SAMtools stats plot](images/mqc_samtools_stats_plot.png) @@ -305,7 +305,7 @@ The library-level alignments associated with the same sample are merged and subs * `reference_genome/` A number of genome-specific files are generated by the pipeline in order to aid in the filtering of the data, and because they are required by standard tools such as BEDTools. These can be found in this directory along with the genome fasta file which is required by IGV. * `reference_genome/BWAIndex/` - If the `--saveGenomeIndex` parameter is provided then the alignment indices generated by the pipeline will be saved in this directory. This can be quite a time-consuming process so it permits their reuse for future runs of the pipeline or for other purposes. + If the `--saveReference` parameter is provided then the alignment indices generated by the pipeline will be saved in this directory. This can be quite a time-consuming process so it permits their reuse for future runs of the pipeline or for other purposes. 2. **Pipeline information** diff --git a/docs/usage.md b/docs/usage.md index 9c3e5f09..e9df71a9 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -26,7 +26,7 @@ * [`--tss_bed`](#--tss_bed) * [`--macs_gsize`](#--macs_gsize) * [`--blacklist`](#--blacklist) - * [`--saveGenomeIndex`](#--savegenomeindex) + * [`--saveReference`](#--saveReference) * [`--igenomesIgnore`](#--igenomesignore) * [Adapter trimming](#adapter-trimming) * [`--skipTrimming`](#--skiptrimming) @@ -270,7 +270,7 @@ params { ``` ### `--fasta` -Full path to fasta file containing reference genome (*mandatory* if `--genome` is not specified). If you don't have a BWA index available this will be generated for you automatically. Combine with `--saveGenomeIndex` to save BWA index for future runs. +Full path to fasta file containing reference genome (*mandatory* if `--genome` is not specified). If you don't have a BWA index available this will be generated for you automatically. Combine with `--saveReference` to save BWA index for future runs. ```bash --fasta '[path to FASTA reference]' @@ -318,7 +318,7 @@ If provided, alignments that overlap with the regions in this file will be filte --blacklist '[path to blacklisted regions]' ``` -### `--saveGenomeIndex` +### `--saveReference` If the BWA index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times. ### `--igenomesIgnore` diff --git a/main.nf b/main.nf index 2a266dde..45cce894 100755 --- a/main.nf +++ b/main.nf @@ -38,7 +38,7 @@ def helpMessage() { --tss_bed Path to BED file containing transcription start sites --macs_gsize Effective genome size parameter required by MACS2. If using iGenomes config, values have only been provided when --genome is set as GRCh37, GRCm38, hg19, mm10, BDGP6 and WBcel235 --blacklist Path to blacklist regions (.BED format), used for filtering alignments - --saveGenomeIndex If generated by the pipeline save the BWA index in the results directory + --saveReference If generated by the pipeline save the BWA index in the results directory Trimming --clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) (Default: 0) @@ -231,7 +231,7 @@ if (params.singleEnd) summary['Fragment Size'] = "$params.fragment_siz summary['Fingerprint Bins'] = params.fingerprint_bins if (params.keepDups) summary['Keep Duplicates'] = 'Yes' if (params.keepMultiMap) summary['Keep Multi-mapped'] = 'Yes' -summary['Save Genome Index'] = params.saveGenomeIndex ? 'Yes' : 'No' +summary['Save Genome Index'] = params.saveReference ? 'Yes' : 'No' if (params.saveTrimmed) summary['Save Trimmed'] = 'Yes' if (params.saveAlignedIntermediates) summary['Save Intermeds'] = 'Yes' if (params.saveMACSPileup) summary['Save MACS2 Pileup'] = 'Yes' @@ -346,8 +346,8 @@ if (!params.bwa_index){ process BWAIndex { tag "$fasta" label 'process_high' - publishDir path: { params.saveGenomeIndex ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.saveGenomeIndex ? it : null }, mode: 'copy' + publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, + saveAs: { params.saveReference ? it : null }, mode: 'copy' input: file fasta from ch_fasta diff --git a/nextflow.config b/nextflow.config index 48631867..2750dd86 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,7 +17,7 @@ params { // Options: References genome = false tss_bed = false - saveGenomeIndex = false + saveReference = false // Options: Trimming clip_r1 = 0 From f41b8fe0b9b1eb374b9453cb7157c4318ea72885 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 15 Oct 2019 13:28:13 +0100 Subject: [PATCH 049/113] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9fbd87a..3793bebe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#50](https://github.com/nf-core/atacseq/issues/50) - HOMER number of peaks does not correspond to found MACS2 peaks * Increase default resource requirements in `base.config` * Increase process-specific requirements based on user-reported failures +* Change parameter `saveGenomeIndex` to `saveReference` ### `Dependencies` From f48b6000dbced4305bd407d20091442721c657bd Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 15 Oct 2019 14:29:31 +0100 Subject: [PATCH 050/113] Unify with chipseq --- main.nf | 156 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 79 insertions(+), 77 deletions(-) diff --git a/main.nf b/main.nf index 45cce894..5304559d 100755 --- a/main.nf +++ b/main.nf @@ -97,7 +97,7 @@ def helpMessage() { */ // Show help message -if (params.help){ +if (params.help) { helpMessage() exit 0 } @@ -122,8 +122,8 @@ params.blacklist = params.genome ? params.genomes[ params.genome ].blacklist ?: // Has the run name been specified by the user? // this has the bonus effect of catching both -name and --name custom_runName = params.name -if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)){ - custom_runName = workflow.runName +if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { + custom_runName = workflow.runName } //////////////////////////////////////////////////// @@ -162,7 +162,7 @@ if (params.gene_bed) { ch_gene_bed = file(params.gene_bed, checkIfExists: true) if (params.tss_bed) { ch_tss_bed = file(params.tss_bed, checkIfExists: true) } if (params.blacklist) { ch_blacklist = Channel.fromPath(params.blacklist, checkIfExists: true) } else { ch_blacklist = Channel.empty() } -if (params.fasta){ +if (params.fasta) { lastPath = params.fasta.lastIndexOf(File.separator) bwa_base = params.fasta.substring(lastPath+1) ch_fasta = file(params.fasta, checkIfExists: true) @@ -170,27 +170,26 @@ if (params.fasta){ exit 1, "Fasta file not specified!" } -if (params.bwa_index){ +if (params.bwa_index) { lastPath = params.bwa_index.lastIndexOf(File.separator) bwa_dir = params.bwa_index.substring(0,lastPath+1) bwa_base = params.bwa_index.substring(lastPath+1) ch_bwa_index = Channel .fromPath(bwa_dir, checkIfExists: true) - .ifEmpty { exit 1, "BWA index directory not found: ${bwa_dir}" } } //////////////////////////////////////////////////// /* -- AWS -- */ //////////////////////////////////////////////////// -if ( workflow.profile == 'awsbatch') { - // AWSBatch sanity checking - if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - // Prevent trace files to be stored on S3 since S3 does not support rolling files. - if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." +if (workflow.profile == 'awsbatch') { + // AWSBatch sanity checking + if (!params.awsqueue || !params.awsregion) exit 1, "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + // related: https://github.com/nextflow-io/nextflow/issues/813 + if (!params.outdir.startsWith('s3:')) exit 1, "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + // Prevent trace files to be stored on S3 since S3 does not support rolling files. + if (workflow.tracedir.startsWith('s3:')) exit 1, "Specify a local tracedir or run without trace! S3 cannot be used for tracefiles." } /////////////////////////////////////////////////////////////////////////////// @@ -218,7 +217,7 @@ summary['MACS2 Genome Size'] = params.macs_gsize ?: 'Not supplied' summary['Min Consensus Reps'] = params.min_reps_consensus if (params.macs_gsize) summary['MACS2 Narrow Peaks'] = params.narrowPeak ? 'Yes' : 'No' if (!params.narrowPeak) summary['MACS2 Broad Cutoff'] = params.broad_cutoff -if (params.skipTrimming){ +if (params.skipTrimming) { summary['Trimming Step'] = 'Skipped' } else { summary['Trim R1'] = "$params.clip_r1 bp" @@ -252,7 +251,7 @@ summary['Launch Dir'] = workflow.launchDir summary['Working Dir'] = workflow.workDir summary['Script Dir'] = workflow.projectDir summary['User'] = workflow.userName -if (workflow.profile == 'awsbatch'){ +if (workflow.profile == 'awsbatch') { summary['AWS Region'] = params.awsregion summary['AWS Queue'] = params.awsqueue } @@ -272,7 +271,7 @@ log.info "-\033[2m--------------------------------------------------\033[0m-" checkHostname() // Show a big warning message if we're not running MACS -if (!params.macs_gsize){ +if (!params.macs_gsize) { def warnstring = params.genome ? "supported for '${params.genome}'" : 'supplied' log.warn "=================================================================\n" + " WARNING! MACS genome size parameter not $warnstring.\n" + @@ -342,7 +341,7 @@ if (params.singleEnd) { /* * PREPROCESSING - Build BWA index */ -if (!params.bwa_index){ +if (!params.bwa_index) { process BWAIndex { tag "$fasta" label 'process_high' @@ -366,7 +365,7 @@ if (!params.bwa_index){ /* * PREPROCESSING - Generate gene BED file */ -if (!params.gene_bed){ +if (!params.gene_bed) { process MakeGeneBED { tag "$gtf" label 'process_low' @@ -388,7 +387,7 @@ if (!params.gene_bed){ /* * PREPROCESSING - Generate TSS BED file */ -if (!params.tss_bed){ +if (!params.tss_bed) { process MakeTSSBED { tag "$bed" publishDir "${params.outdir}/reference_genome", mode: 'copy' @@ -486,7 +485,7 @@ process FastQC { /* * STEP 2 - Trim Galore! */ -if (params.skipTrimming){ +if (params.skipTrimming) { ch_trimmed_reads = ch_raw_reads_trimgalore ch_trimgalore_results_mqc = [] ch_trimgalore_fastqc_reports_mqc = [] @@ -542,7 +541,7 @@ if (params.skipTrimming){ /* * STEP 3.1 - Align read 1 with bwa */ -process BWAmem { +process BWAMem { tag "$name" label 'process_high' @@ -554,10 +553,10 @@ process BWAmem { set val(name), file("*.bam") into ch_bwa_bam script: - prefix="${name}.Lb" - rg="\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'" + prefix = "${name}.Lb" + rg = "\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'" if (params.seq_center) { - rg="\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\\tCN:${params.seq_center}\'" + rg = "\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\\tCN:${params.seq_center}\'" } """ bwa mem \\ @@ -593,7 +592,7 @@ process SortBAM { file "*.{flagstat,idxstats,stats}" into ch_sort_bam_flagstat_mqc script: - prefix="${name}.Lb" + prefix = "${name}.Lb" """ samtools sort -@ $task.cpus -o ${prefix}.sorted.bam -T $name $bam samtools index ${prefix}.sorted.bam @@ -612,7 +611,7 @@ process SortBAM { /////////////////////////////////////////////////////////////////////////////// /* - * STEP 4.1 Merge BAM files for all libraries from same sample + * STEP 4.1 Merge BAM files for all libraries from same replicate */ ch_sort_bam_merge.map { it -> [ it[0].split('_')[0..-2].join('_'), it[1] ] } .groupTuple(by: [0]) @@ -641,10 +640,10 @@ process MergeBAM { file "*.txt" into ch_merge_bam_metrics_mqc script: - prefix="${name}.mLb.mkD" + prefix = "${name}.mLb.mkD" bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() def avail_mem = 3 - if (!task.memory){ + if (!task.memory) { log.info "[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this." } else { avail_mem = task.memory.toGiga() @@ -749,7 +748,7 @@ process MergeBAMFilter { /* * STEP 4.3 Remove orphan reads from paired-end BAM file */ -if (params.singleEnd){ +if (params.singleEnd) { ch_filter_bam.into { ch_rm_orphan_bam_metrics; ch_rm_orphan_bam_bigwig; ch_rm_orphan_bam_macs_1; @@ -790,7 +789,7 @@ if (params.singleEnd){ file "*.{idxstats,stats}" into ch_rm_orphan_stats_mqc script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - prefix="${name}.mLb.clN" + prefix = "${name}.mLb.clN" """ bampe_rm_orphan.py ${bam[0]} ${prefix}.bam --only_fr_pairs @@ -829,7 +828,7 @@ process Preseq { file "*.ccurve.txt" into ch_preseq_results script: - prefix="${name}.mLb.clN" + prefix = "${name}.mLb.clN" """ preseq lc_extrap -v -output ${prefix}.ccurve.txt -bam ${bam[0]} """ @@ -860,9 +859,9 @@ process CollectMultipleMetrics { file "*.pdf" into ch_collectmetrics_pdf script: - prefix="${name}.mLb.clN" + prefix = "${name}.mLb.clN" def avail_mem = 3 - if (!task.memory){ + if (!task.memory) { log.info "[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this." } else { avail_mem = task.memory.toGiga() @@ -885,7 +884,7 @@ process BigWig { label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/bigwig", mode: 'copy', saveAs: {filename -> - if (filename.endsWith(".txt")) "scale/$filename" + if (filename.endsWith("scale_factor.txt")) "scale/$filename" else if (filename.endsWith(".bigWig")) "$filename" else null } @@ -900,7 +899,7 @@ process BigWig { file "*igv.txt" into ch_bigwig_igv script: - prefix="${name}.mLb.clN" + prefix = "${name}.mLb.clN" pe_fragment = params.singleEnd ? "" : "-pc" extend = (params.singleEnd && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' """ @@ -1116,7 +1115,8 @@ process AnnotatePeaks { script: peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ - annotatePeaks.pl $peak \\ + annotatePeaks.pl \\ + $peak \\ $fasta \\ -gid \\ -gtf $gtf \\ @@ -1129,38 +1129,38 @@ process AnnotatePeaks { * STEP 6.4 Aggregated QC plots for peaks, FRiP and peak-to-gene annotation */ process PeakQC { - label "process_medium" - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/qc", mode: 'copy' - - when: - params.macs_gsize - - input: - file peaks from ch_macs_qc.collect{ it[-1] } - file annos from ch_macs_annotate.collect() - file peak_annotation_header from ch_peak_annotation_header - - output: - file "*.{txt,pdf}" into ch_macs_qc_output - file "*.tsv" into ch_macs_qc_mqc - - script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - """ - plot_macs_qc.r \\ - -i ${peaks.join(',')} \\ - -s ${peaks.join(',').replaceAll("_peaks.${peaktype}","")} \\ - -o ./ \\ - -p macs_peak - - plot_homer_annotatepeaks.r \\ - -i ${annos.join(',')} \\ - -s ${annos.join(',').replaceAll("_peaks.annotatePeaks.txt","")} \\ - -o ./ \\ - -p macs_annotatePeaks - - cat $peak_annotation_header macs_annotatePeaks.summary.txt > macs_annotatePeaks.summary_mqc.tsv - """ + label "process_medium" + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/qc", mode: 'copy' + + when: + params.macs_gsize + + input: + file peaks from ch_macs_qc.collect{ it[-1] } + file annos from ch_macs_annotate.collect() + file peak_annotation_header from ch_peak_annotation_header + + output: + file "*.{txt,pdf}" into ch_macs_qc_output + file "*.tsv" into ch_macs_qc_mqc + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + """ + plot_macs_qc.r \\ + -i ${peaks.join(',')} \\ + -s ${peaks.join(',').replaceAll("_peaks.${peaktype}","")} \\ + -o ./ \\ + -p macs_peak + + plot_homer_annotatepeaks.r \\ + -i ${annos.join(',')} \\ + -s ${annos.join(',').replaceAll("_peaks.annotatePeaks.txt","")} \\ + -o ./ \\ + -p macs_annotatePeaks + + cat $peak_annotation_header macs_annotatePeaks.summary.txt > macs_annotatePeaks.summary_mqc.tsv + """ } /////////////////////////////////////////////////////////////////////////////// @@ -1203,7 +1203,7 @@ process ConsensusPeakSet { file "*igv.txt" into ch_macs_consensus_igv script: // scripts are bundled with the pipeline, in nf-core/chipseq/bin/ - prefix="${antibody}.consensus_peaks" + prefix = "${antibody}.consensus_peaks" peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" mergecols = params.narrowPeak ? (2..10).join(',') : (2..9).join(',') collapsecols = params.narrowPeak ? (["collapse"]*9).join(',') : (["collapse"]*8).join(',') @@ -1250,10 +1250,11 @@ process ConsensusPeakSetAnnotate { file "*.annotatePeaks.txt" into ch_macs_consensus_annotate script: - prefix="${antibody}.consensus_peaks" + prefix = "${antibody}.consensus_peaks" peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" """ - annotatePeaks.pl $bed \\ + annotatePeaks.pl \\ + $bed \\ $fasta \\ -gid \\ -gtf $gtf \\ @@ -1305,13 +1306,14 @@ process ConsensusPeakSetDESeq { file "*.tsv" into ch_macs_consensus_deseq_mqc script: - prefix="${antibody}.consensus_peaks" + prefix = "${antibody}.consensus_peaks" peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() bam_ext = params.singleEnd ? ".mLb.clN.sorted.bam" : ".mLb.clN.bam" pe_params = params.singleEnd ? '' : "-p --donotsort" """ - featureCounts -F SAF \\ + featureCounts \\ + -F SAF \\ -O \\ --fracOverlap 0.2 \\ -T $task.cpus \\ @@ -1587,7 +1589,7 @@ workflow.onComplete { // Send the HTML e-mail if (email_address) { try { - if ( params.plaintext_email ){ throw GroovyException('Send plaintext e-mail, not HTML') } + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } // Try to send HTML e-mail using sendmail [ 'sendmail', '-t' ].execute() << sendmail_html log.info "[nf-core/chipseq] Sent summary e-mail to $email_address (sendmail)" @@ -1636,7 +1638,7 @@ workflow.onComplete { /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// -def nfcoreHeader(){ +def nfcoreHeader() { // Log colors ANSI codes c_reset = params.monochrome_logs ? '' : "\033[0m"; c_dim = params.monochrome_logs ? '' : "\033[2m"; @@ -1659,7 +1661,7 @@ def nfcoreHeader(){ """.stripIndent() } -def checkHostname(){ +def checkHostname() { def c_reset = params.monochrome_logs ? '' : "\033[0m" def c_white = params.monochrome_logs ? '' : "\033[0;37m" def c_red = params.monochrome_logs ? '' : "\033[1;91m" From 7015763cf3080e076564be54d4110e1c3d3252e0 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 15 Oct 2019 14:52:27 +0100 Subject: [PATCH 051/113] Adjust spacing --- main.nf | 118 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 61 insertions(+), 57 deletions(-) diff --git a/main.nf b/main.nf index 5304559d..6cf47045 100755 --- a/main.nf +++ b/main.nf @@ -446,7 +446,9 @@ process FastQC { tag "$name" label 'process_medium' publishDir "${params.outdir}/fastqc", mode: 'copy', - saveAs: {filename -> filename.endsWith(".zip") ? "zips/$filename" : "$filename"} + saveAs: { filename -> + filename.endsWith(".zip") ? "zips/$filename" : "$filename" + } when: !params.skipFastQC @@ -494,12 +496,12 @@ if (params.skipTrimming) { tag "$name" label 'process_long' publishDir "${params.outdir}/trim_galore", mode: 'copy', - saveAs: {filename -> - if (filename.endsWith(".html")) "fastqc/$filename" - else if (filename.endsWith(".zip")) "fastqc/zips/$filename" - else if (filename.endsWith("trimming_report.txt")) "logs/$filename" - else params.saveTrimmed ? filename : null - } + saveAs: { filename -> + if (filename.endsWith(".html")) "fastqc/$filename" + else if (filename.endsWith(".zip")) "fastqc/zips/$filename" + else if (filename.endsWith("trimming_report.txt")) "logs/$filename" + else params.saveTrimmed ? filename : null + } input: set val(name), file(reads) from ch_raw_reads_trimgalore @@ -578,11 +580,12 @@ process SortBAM { if (params.saveAlignedIntermediates) { publishDir path: "${params.outdir}/bwa/library", mode: 'copy', saveAs: { filename -> - if (filename.endsWith(".flagstat")) "samtools_stats/$filename" - else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" - else if (filename.endsWith(".stats")) "samtools_stats/$filename" - else filename } - } + if (filename.endsWith(".flagstat")) "samtools_stats/$filename" + else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" + else if (filename.endsWith(".stats")) "samtools_stats/$filename" + else filename + } + } input: set val(name), file(bam) from ch_bwa_bam @@ -623,12 +626,12 @@ process MergeBAM { label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary", mode: 'copy', saveAs: { filename -> - if (filename.endsWith(".flagstat")) "samtools_stats/$filename" - else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" - else if (filename.endsWith(".stats")) "samtools_stats/$filename" - else if (filename.endsWith(".metrics.txt")) "picard_metrics/$filename" - else params.saveAlignedIntermediates ? filename : null - } + if (filename.endsWith(".flagstat")) "samtools_stats/$filename" + else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" + else if (filename.endsWith(".stats")) "samtools_stats/$filename" + else if (filename.endsWith(".metrics.txt")) "picard_metrics/$filename" + else params.saveAlignedIntermediates ? filename : null + } input: set val(name), file(bams) from ch_sort_bam_merge @@ -699,14 +702,15 @@ process MergeBAMFilter { label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: 'copy', saveAs: { filename -> - if (params.singleEnd || params.saveAlignedIntermediates) { - if (filename.endsWith(".flagstat")) "samtools_stats/$filename" - else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" - else if (filename.endsWith(".stats")) "samtools_stats/$filename" - else if (filename.endsWith(".sorted.bam")) filename - else if (filename.endsWith(".sorted.bam.bai")) filename - else null } - } + if (params.singleEnd || params.saveAlignedIntermediates) { + if (filename.endsWith(".flagstat")) "samtools_stats/$filename" + else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" + else if (filename.endsWith(".stats")) "samtools_stats/$filename" + else if (filename.endsWith(".sorted.bam")) filename + else if (filename.endsWith(".sorted.bam.bai")) filename + else null + } + } input: set val(name), file(bam) from ch_merge_bam_filter @@ -765,13 +769,13 @@ if (params.singleEnd) { label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: 'copy', saveAs: { filename -> - if (filename.endsWith(".flagstat")) "samtools_stats/$filename" - else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" - else if (filename.endsWith(".stats")) "samtools_stats/$filename" - else if (filename.endsWith(".sorted.bam")) filename - else if (filename.endsWith(".sorted.bam.bai")) filename - else null - } + if (filename.endsWith(".flagstat")) "samtools_stats/$filename" + else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" + else if (filename.endsWith(".stats")) "samtools_stats/$filename" + else if (filename.endsWith(".sorted.bam")) filename + else if (filename.endsWith(".sorted.bam.bai")) filename + else null + } input: set val(name), file(bam) from ch_filter_bam @@ -825,7 +829,7 @@ process Preseq { set val(name), file(bam) from ch_merge_bam_preseq output: - file "*.ccurve.txt" into ch_preseq_results + file "*.ccurve.txt" into ch_preseq_mqc script: prefix = "${name}.mLb.clN" @@ -842,10 +846,10 @@ process CollectMultipleMetrics { label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: 'copy', saveAs: { filename -> - if (filename.endsWith("_metrics")) "picard_metrics/$filename" - else if (filename.endsWith(".pdf")) "picard_metrics/pdf/$filename" - else null - } + if (filename.endsWith("_metrics")) "picard_metrics/$filename" + else if (filename.endsWith(".pdf")) "picard_metrics/pdf/$filename" + else null + } when: !params.skipPicardMetrics @@ -883,10 +887,10 @@ process BigWig { tag "$name" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/bigwig", mode: 'copy', - saveAs: {filename -> - if (filename.endsWith("scale_factor.txt")) "scale/$filename" - else if (filename.endsWith(".bigWig")) "$filename" - else null + saveAs: { filename -> + if (filename.endsWith("scale_factor.txt")) "scale/$filename" + else if (filename.endsWith(".bigWig")) "$filename" + else null } input: @@ -1048,10 +1052,10 @@ process MACSCallPeak { tag "${ip} vs ${control}" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}", mode: 'copy', - saveAs: {filename -> - if (filename.endsWith(".tsv")) "qc/$filename" - else if (filename.endsWith(".igv.txt")) null - else filename + saveAs: { filename -> + if (filename.endsWith(".tsv")) "qc/$filename" + else if (filename.endsWith(".igv.txt")) null + else filename } when: @@ -1184,9 +1188,9 @@ process ConsensusPeakSet { tag "${antibody}" label 'process_long' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}", mode: 'copy', - saveAs: {filename -> - if (filename.endsWith(".igv.txt")) null - else filename + saveAs: { filename -> + if (filename.endsWith(".igv.txt")) null + else filename } when: @@ -1282,9 +1286,9 @@ process ConsensusPeakSetDESeq { tag "${antibody}" label 'process_medium' publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}/deseq2", mode: 'copy', - saveAs: {filename -> - if (filename.endsWith(".igv.txt")) null - else filename + saveAs: { filename -> + if (filename.endsWith(".igv.txt")) null + else filename } when: @@ -1384,10 +1388,10 @@ process IGV { */ process get_software_versions { publishDir "${params.outdir}/pipeline_info", mode: 'copy', - saveAs: {filename -> - if (filename.indexOf(".csv") > 0) filename - else null - } + saveAs: { filename -> + if (filename.indexOf(".csv") > 0) filename + else null + } output: file 'software_versions_mqc.yaml' into ch_software_versions_mqc @@ -1465,7 +1469,7 @@ process MultiQC { file ('macs/consensus/*') from ch_macs_consensus_counts_mqc.collect().ifEmpty([]) file ('macs/consensus/*') from ch_macs_consensus_deseq_mqc.collect().ifEmpty([]) - file ('preseq/*') from ch_preseq_results.collect().ifEmpty([]) + file ('preseq/*') from ch_preseq_mqc.collect().ifEmpty([]) file ('deeptools/*') from ch_plotfingerprint_mqc.collect().ifEmpty([]) file ('deeptools/*') from ch_plotprofile_mqc.collect().ifEmpty([]) file ('phantompeakqualtools/*') from ch_spp_out_mqc.collect().ifEmpty([]) From 8f01b679501fee4e732087e2c6abc72828cc252c Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 15 Oct 2019 15:07:54 +0100 Subject: [PATCH 052/113] Fix description --- bin/check_design.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/check_design.py b/bin/check_design.py index 751932b0..9c263b8f 100755 --- a/bin/check_design.py +++ b/bin/check_design.py @@ -2,7 +2,7 @@ ####################################################################### ####################################################################### -## Created on April 4th 2019 to reformat nf-core/chipseq design file +## Created on April 4th 2019 to check nf-core/chipseq design file ####################################################################### ####################################################################### @@ -18,7 +18,7 @@ ############################################ Description = 'Reformat nf-core/chipseq design file and check its contents.' -Epilog = """Example usage: python reformat_design.py """ +Epilog = """Example usage: python check_design.py """ argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) From a5be4d4be848b6318d425eff6e80478b1fd5963f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 15 Oct 2019 16:48:40 +0100 Subject: [PATCH 053/113] Update docs --- docs/output.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index 4c48e046..0df7863d 100644 --- a/docs/output.md +++ b/docs/output.md @@ -79,7 +79,6 @@ The library-level alignments associated with the same sample are merged and subs Read duplicate marking is carried out using the Picard MarkDuplicates command. Duplicate reads are generally removed from the aligned reads to mitigate for fragments in the library that may have been sequenced more than once due to PCR biases. There is an option to keep duplicate reads with the `--keepDups` parameter but its generally recommended to remove them to avoid the wrong interpretation of the results. A similar option has been provided to keep reads that are multi-mapped - `--keepMultiMap`. Other steps have been incorporated into the pipeline to filter the resulting alignments - see [`main README.md`](../README.md) for a more comprehensive listing, and the tools used at each step. - A selection of alignment-based QC metrics generated by Picard CollectMultipleMetrics and MarkDuplicates will be included in the MultiQC report. ![MultiQC - Picard deduplication stats plot](images/mqc_picard_deduplication_plot.png) @@ -171,7 +170,7 @@ The library-level alignments associated with the same sample are merged and subs ![MultiQC - MACS2 peaks FRiP score plot](images/mqc_frip_score_plot.png) - `` in the directory structure below corresponds to the type of peak that you have specified to call with MACS2 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrowPeak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline. Also, the IGV session file and MultiQC reports in the results directory will be overwritten with the latest output so you may want to rename/move these beforehand. + `` in the directory structure below corresponds to the type of peak that you have specified to call with MACS2 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrowPeak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline. *Output directories*: * `bwa/mergedLibrary/macs//` From b2fdfd82b76262d57478a81913298189b983badf Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 15 Oct 2019 16:51:02 +0100 Subject: [PATCH 054/113] Add peak dir path --- docs/output.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index 0df7863d..d2b019d4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -262,7 +262,7 @@ The library-level alignments associated with the same sample are merged and subs The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . *Output directories*: - * `multiqc/` + * `multiqc//` * `multiqc_report.html` - a standalone HTML file that can be viewed in your web browser. * `multiqc_data/` - directory containing parsed statistics from the different tools used in the pipeline. * `multiqc_plots/` - directory containing static images from the report in various formats. @@ -286,7 +286,7 @@ The library-level alignments associated with the same sample are merged and subs ![IGV screenshot](images/igv_screenshot.png) *Output directories*: - * `igv/` + * `igv//` * `igv_session.xml` file. * `igv_files.txt` file containing a listing of the files used to create the IGV session, and their allocated colours. From d925efacaac5bebd155de690743865c534453ee3 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 16 Oct 2019 10:49:15 +0100 Subject: [PATCH 055/113] Remove skipMultiQCStats parameter --- docs/usage.md | 2 -- main.nf | 6 +----- nextflow.config | 1 - 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index e9df71a9..d7484874 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -391,8 +391,6 @@ The following options make this easy: | `--skipIGV` | Skip IGV | | `--skipMultiQC` | Skip MultiQC | -`--skipMultiQCStats` allows you to exclude the [general statistics table](https://multiqc.info/docs/#general-statistics-table) from the MultiQC report. - ## Job resources ### Automatic resubmission Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. diff --git a/main.nf b/main.nf index 6cf47045..c4d8a056 100755 --- a/main.nf +++ b/main.nf @@ -69,7 +69,6 @@ def helpMessage() { --skipSpp Skip Phantompeakqualtools --skipIGV Skip IGV --skipMultiQC Skip MultiQC - --skipMultiQCStats Exclude general statistics table from MultiQC report Other --outdir The output directory where the results will be saved @@ -243,7 +242,6 @@ if (params.skipPlotFingerprint) summary['Skip plotFingerprint'] = 'Yes' if (params.skipSpp) summary['Skip spp'] = 'Yes' if (params.skipIGV) summary['Skip IGV'] = 'Yes' if (params.skipMultiQC) summary['Skip MultiQC'] = 'Yes' -if (params.skipMultiQCStats) summary['Skip MultiQC Stats'] = 'Yes' summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" summary['Output Dir'] = params.outdir @@ -1484,11 +1482,9 @@ process MultiQC { peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" rtitle = custom_runName ? "--title \"$custom_runName\"" : '' rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - mqcstats = params.skipMultiQCStats ? '--cl_config "skip_generalstats: true"' : '' """ multiqc . -f $rtitle $rfilename --config $multiqc_config \\ - -m custom_content -m fastqc -m cutadapt -m samtools -m picard -m preseq -m featureCounts -m deeptools -m phantompeakqualtools \\ - $mqcstats + -m custom_content -m fastqc -m cutadapt -m samtools -m picard -m preseq -m featureCounts -m deeptools -m phantompeakqualtools """ } diff --git a/nextflow.config b/nextflow.config index 2750dd86..4d643eb3 100644 --- a/nextflow.config +++ b/nextflow.config @@ -48,7 +48,6 @@ params { skipSpp = false skipIGV = false skipMultiQC = false - skipMultiQCStats = false // Options: AWSBatch awsqueue = false From 23ddbacc8be332b8be65433fd9fb4d19e4751d68 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 16 Oct 2019 11:03:45 +0100 Subject: [PATCH 056/113] Add --trim_nextseq parameter --- docs/usage.md | 18 ++++++++++-------- main.nf | 7 +++++-- nextflow.config | 1 + 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index d7484874..5fde2736 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -328,14 +328,16 @@ Do not load `igenomes.config` when running the pipeline. You may choose this opt The pipeline accepts a number of parameters to change how the trimming is done, according to your data type. You can specify custom trimming parameters as follows: -* `--clip_r1 ` - * Instructs Trim Galore to remove bp from the 5' end of read 1 (for single-end reads). -* `--clip_r2 ` - * Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only). -* `--three_prime_clip_r1 ` - * Instructs Trim Galore to remove bp from the 3' end of read 1 _AFTER_ adapter/quality trimming has been -* `--three_prime_clip_r2 ` - * Instructs Trim Galore to re move bp from the 3' end of read 2 _AFTER_ adapter/quality trimming has been performed. +* `--clip_r1 [int]` + * Instructs Trim Galore to remove [int] bp from the 5' end of read 1 (for single-end reads). +* `--clip_r2 [int]` + * Instructs Trim Galore to remove [int] bp from the 5' end of read 2 (paired-end reads only). +* `--three_prime_clip_r1 [int]` + * Instructs Trim Galore to remove [int] bp from the 3' end of read 1 _AFTER_ adapter/quality trimming has been +* `--three_prime_clip_r2 [int]` + * Instructs Trim Galore to remove [int] bp from the 3' end of read 2 _AFTER_ adapter/quality trimming has been performed. +* `--trim_nextseq [int]` + * This enables the option Cutadapt `--nextseq-trim=3'CUTOFF` option via Trim Galore, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. ### `--skipTrimming` Skip the adapter trimming step. Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data. diff --git a/main.nf b/main.nf index c4d8a056..e81e6955 100755 --- a/main.nf +++ b/main.nf @@ -45,6 +45,7 @@ def helpMessage() { --clip_r2 [int] Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only) (Default: 0) --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed (Default: 0) --three_prime_clip_r2 [int] Instructs Trim Galore to re move bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed (Default: 0) + --trim_nextseq [int] Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails (Default: 0) --skipTrimming Skip the adapter trimming step --saveTrimmed Save the trimmed FastQ files in the results directory @@ -223,6 +224,7 @@ if (params.skipTrimming) { summary['Trim R2'] = "$params.clip_r2 bp" summary["Trim 3' R1"] = "$params.three_prime_clip_r1 bp" summary["Trim 3' R2"] = "$params.three_prime_clip_r2 bp" + summary["NextSeq Trim"] = "$params.trim_nextseq bp" } if (params.seq_center) summary['Sequencing Center'] = params.seq_center if (params.singleEnd) summary['Fragment Size'] = "$params.fragment_size bp" @@ -515,16 +517,17 @@ if (params.skipTrimming) { c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' + nextseq = params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '' if (params.singleEnd) { """ [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz - trim_galore --fastqc --gzip $c_r1 $tpc_r1 ${name}.fastq.gz + trim_galore --fastqc --gzip $c_r1 $tpc_r1 $nextseq ${name}.fastq.gz """ } else { """ [ ! -f ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz [ ! -f ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz - trim_galore --paired --fastqc --gzip $c_r1 $c_r2 $tpc_r1 $tpc_r2 ${name}_1.fastq.gz ${name}_2.fastq.gz + trim_galore --paired --fastqc --gzip $c_r1 $c_r2 $tpc_r1 $tpc_r2 $nextseq ${name}_1.fastq.gz ${name}_2.fastq.gz """ } } diff --git a/nextflow.config b/nextflow.config index 4d643eb3..efd75efb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -24,6 +24,7 @@ params { clip_r2 = 0 three_prime_clip_r1 = 0 three_prime_clip_r2 = 0 + trim_nextseq = 0 skipTrimming = false saveTrimmed = false From 1a4239bd56c88d49f47bd45a24b2c1b6a9500d79 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 16 Oct 2019 12:32:35 +0100 Subject: [PATCH 057/113] Fix bug in plot --- bin/plot_peak_intersect.r | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/bin/plot_peak_intersect.r b/bin/plot_peak_intersect.r index 58bb3b33..2404b8b9 100755 --- a/bin/plot_peak_intersect.r +++ b/bin/plot_peak_intersect.r @@ -45,14 +45,28 @@ comb.dat <- read.table(opt$input_file,sep="\t",header=FALSE) comb.vec <- comb.dat[,2] comb.vec <- setNames(comb.vec,comb.dat[,1]) -pdf(opt$output_file,onefile=F,height=10,width=14) - -upset(fromExpression(comb.vec), - sets.bar.color = "#56B4E9", - point.size = 5, - line.size = 2, - order.by = "freq", - text.scale = c(1.7, 1.5, 1.7, 1.5, 1.7, 1.7)) +sets <- sort(unique(unlist(strsplit(names(comb.vec),split='&'))), decreasing = TRUE) +nintersects = length(names(comb.vec)) +if (nintersects > 70) { + nintersects <- 70 +} + +pdf(opt$output_file,onefile=F,height=10,width=20) + +upset( + fromExpression(comb.vec), + nsets = length(sets), + nintersects = nintersects, + sets = sets, + keep.order = TRUE, + sets.bar.color = "#56B4E9", + point.size = 3, + line.size = 1, + mb.ratio = c(0.55, 0.45), + order.by = "freq", + number.angles = 30, + text.scale = c(1.5, 1.5, 1.5, 1.5, 1.5, 1.2) +) dev.off() From 0a3ffb0acf46c5c3df9e690f715aec04386145ac Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 17 Oct 2019 17:39:36 +0100 Subject: [PATCH 058/113] Use autoMounts with singularity profile --- nextflow.config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index efd75efb..5eebadd7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -102,7 +102,8 @@ profiles { conda { process.conda = "$baseDir/environment.yml" } debug { process.beforeScript = 'echo $HOSTNAME' } docker { docker.enabled = true } - singularity { singularity.enabled = true } + singularity { singularity.enabled = true + singularity.autoMounts = true } test { includeConfig 'conf/test.config' } } From 87ab87e477925184b8bf7f6e7bf59e27deaa8823 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 10:39:43 +0100 Subject: [PATCH 059/113] Update parameters --- main.nf | 76 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/main.nf b/main.nf index e81e6955..d0bddaa6 100755 --- a/main.nf +++ b/main.nf @@ -26,7 +26,7 @@ def helpMessage() { Available: conda, docker, singularity, awsbatch, test Generic - --singleEnd Specifies that the input is single-end reads + --single_end Specifies that the input is single-end reads --seq_center Sequencing center information to be added to read group of BAM files --fragment_size [int] Estimated fragment size used to extend single-end reads (Default: 200) --fingerprint_bins [int] Number of genomic bins to use when calculating fingerprint plot (Default: 500000) @@ -38,7 +38,7 @@ def helpMessage() { --tss_bed Path to BED file containing transcription start sites --macs_gsize Effective genome size parameter required by MACS2. If using iGenomes config, values have only been provided when --genome is set as GRCh37, GRCm38, hg19, mm10, BDGP6 and WBcel235 --blacklist Path to blacklist regions (.BED format), used for filtering alignments - --saveReference If generated by the pipeline save the BWA index in the results directory + --save_reference If generated by the pipeline save the BWA index in the results directory Trimming --clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) (Default: 0) @@ -46,13 +46,13 @@ def helpMessage() { --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed (Default: 0) --three_prime_clip_r2 [int] Instructs Trim Galore to re move bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed (Default: 0) --trim_nextseq [int] Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails (Default: 0) - --skipTrimming Skip the adapter trimming step - --saveTrimmed Save the trimmed FastQ files in the results directory + --skip_trimming Skip the adapter trimming step + --save_trimmed Save the trimmed FastQ files in the results directory Alignments - --keepDups Duplicate reads are not filtered from alignments - --keepMultiMap Reads mapping to multiple locations are not filtered from alignments - --saveAlignedIntermediates Save the intermediate BAM files from the alignment step - not done by default + --keep_dups Duplicate reads are not filtered from alignments + --keep_multi_map Reads mapping to multiple locations are not filtered from alignments + --save_align_intermeds Save the intermediate BAM files from the alignment step - not done by default Peaks --narrowPeak Run MACS2 in narrowPeak mode @@ -134,7 +134,7 @@ if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { ch_output_docs = file("$baseDir/docs/output.md", checkIfExists: true) // JSON files required by BAMTools for alignment filtering -if (params.singleEnd) { +if (params.single_end) { ch_bamtools_filter_config = file(params.bamtools_filter_se_config, checkIfExists: true) } else { ch_bamtools_filter_config = file(params.bamtools_filter_pe_config, checkIfExists: true) @@ -204,7 +204,7 @@ if (workflow.profile == 'awsbatch') { log.info nfcoreHeader() def summary = [:] summary['Run Name'] = custom_runName ?: workflow.runName -summary['Data Type'] = params.singleEnd ? 'Single-End' : 'Paired-End' +summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End' summary['Design File'] = params.design summary['Genome'] = params.genome ?: 'Not supplied' summary['Fasta File'] = params.fasta @@ -217,7 +217,7 @@ summary['MACS2 Genome Size'] = params.macs_gsize ?: 'Not supplied' summary['Min Consensus Reps'] = params.min_reps_consensus if (params.macs_gsize) summary['MACS2 Narrow Peaks'] = params.narrowPeak ? 'Yes' : 'No' if (!params.narrowPeak) summary['MACS2 Broad Cutoff'] = params.broad_cutoff -if (params.skipTrimming) { +if (params.skip_trimming) { summary['Trimming Step'] = 'Skipped' } else { summary['Trim R1'] = "$params.clip_r1 bp" @@ -227,13 +227,13 @@ if (params.skipTrimming) { summary["NextSeq Trim"] = "$params.trim_nextseq bp" } if (params.seq_center) summary['Sequencing Center'] = params.seq_center -if (params.singleEnd) summary['Fragment Size'] = "$params.fragment_size bp" +if (params.single_end) summary['Fragment Size'] = "$params.fragment_size bp" summary['Fingerprint Bins'] = params.fingerprint_bins -if (params.keepDups) summary['Keep Duplicates'] = 'Yes' -if (params.keepMultiMap) summary['Keep Multi-mapped'] = 'Yes' -summary['Save Genome Index'] = params.saveReference ? 'Yes' : 'No' -if (params.saveTrimmed) summary['Save Trimmed'] = 'Yes' -if (params.saveAlignedIntermediates) summary['Save Intermeds'] = 'Yes' +if (params.keep_dups) summary['Keep Duplicates'] = 'Yes' +if (params.keep_multi_map) summary['Keep Multi-mapped'] = 'Yes' +summary['Save Genome Index'] = params.save_reference ? 'Yes' : 'No' +if (params.save_trimmed) summary['Save Trimmed'] = 'Yes' +if (params.save_align_intermeds) summary['Save Intermeds'] = 'Yes' if (params.saveMACSPileup) summary['Save MACS2 Pileup'] = 'Yes' if (params.skipDiffAnalysis) summary['Skip Diff Analysis'] = 'Yes' if (params.skipFastQC) summary['Skip FastQC'] = 'Yes' @@ -311,7 +311,7 @@ process CheckDesign { /* * Create channels for input fastq files */ -if (params.singleEnd) { +if (params.single_end) { ch_design_reads_csv.splitCsv(header:true, sep:',') .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true) ] ] } .into { ch_raw_reads_fastqc; @@ -345,8 +345,8 @@ if (!params.bwa_index) { process BWAIndex { tag "$fasta" label 'process_high' - publishDir path: { params.saveReference ? "${params.outdir}/reference_genome" : params.outdir }, - saveAs: { params.saveReference ? it : null }, mode: 'copy' + publishDir path: { params.save_reference ? "${params.outdir}/reference_genome" : params.outdir }, + saveAs: { params.save_reference ? it : null }, mode: 'copy' input: file fasta from ch_fasta @@ -461,7 +461,7 @@ process FastQC { script: // Added soft-links to original fastqs for consistent naming in MultiQC - if (params.singleEnd) { + if (params.single_end) { """ [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz fastqc -q -t $task.cpus ${name}.fastq.gz @@ -487,7 +487,7 @@ process FastQC { /* * STEP 2 - Trim Galore! */ -if (params.skipTrimming) { +if (params.skip_trimming) { ch_trimmed_reads = ch_raw_reads_trimgalore ch_trimgalore_results_mqc = [] ch_trimgalore_fastqc_reports_mqc = [] @@ -500,7 +500,7 @@ if (params.skipTrimming) { if (filename.endsWith(".html")) "fastqc/$filename" else if (filename.endsWith(".zip")) "fastqc/zips/$filename" else if (filename.endsWith("trimming_report.txt")) "logs/$filename" - else params.saveTrimmed ? filename : null + else params.save_trimmed ? filename : null } input: @@ -518,7 +518,7 @@ if (params.skipTrimming) { tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' nextseq = params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '' - if (params.singleEnd) { + if (params.single_end) { """ [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz trim_galore --fastqc --gzip $c_r1 $tpc_r1 $nextseq ${name}.fastq.gz @@ -578,7 +578,7 @@ process BWAMem { process SortBAM { tag "$name" label 'process_medium' - if (params.saveAlignedIntermediates) { + if (params.save_align_intermeds) { publishDir path: "${params.outdir}/bwa/library", mode: 'copy', saveAs: { filename -> if (filename.endsWith(".flagstat")) "samtools_stats/$filename" @@ -631,7 +631,7 @@ process MergeBAM { else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" else if (filename.endsWith(".stats")) "samtools_stats/$filename" else if (filename.endsWith(".metrics.txt")) "picard_metrics/$filename" - else params.saveAlignedIntermediates ? filename : null + else params.save_align_intermeds ? filename : null } input: @@ -703,7 +703,7 @@ process MergeBAMFilter { label 'process_medium' publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: 'copy', saveAs: { filename -> - if (params.singleEnd || params.saveAlignedIntermediates) { + if (params.single_end || params.save_align_intermeds) { if (filename.endsWith(".flagstat")) "samtools_stats/$filename" else if (filename.endsWith(".idxstats")) "samtools_stats/$filename" else if (filename.endsWith(".stats")) "samtools_stats/$filename" @@ -724,12 +724,12 @@ process MergeBAMFilter { file "*.{idxstats,stats}" into ch_filter_bam_stats_mqc script: - prefix = params.singleEnd ? "${name}.mLb.clN" : "${name}.mLb.flT" - filter_params = params.singleEnd ? "-F 0x004" : "-F 0x004 -F 0x0008 -f 0x001" - dup_params = params.keepDups ? "" : "-F 0x0400" - multimap_params = params.keepMultiMap ? "" : "-q 1" + prefix = params.single_end ? "${name}.mLb.clN" : "${name}.mLb.flT" + filter_params = params.single_end ? "-F 0x004" : "-F 0x004 -F 0x0008 -f 0x001" + dup_params = params.keep_dups ? "" : "-F 0x0400" + multimap_params = params.keep_multi_map ? "" : "-q 1" blacklist_params = params.blacklist ? "-L $bed" : "" - name_sort_bam = params.singleEnd ? "" : "samtools sort -n -@ $task.cpus -o ${prefix}.bam -T $prefix ${prefix}.sorted.bam" + name_sort_bam = params.single_end ? "" : "samtools sort -n -@ $task.cpus -o ${prefix}.bam -T $prefix ${prefix}.sorted.bam" """ samtools view \\ $filter_params \\ @@ -753,7 +753,7 @@ process MergeBAMFilter { /* * STEP 4.3 Remove orphan reads from paired-end BAM file */ -if (params.singleEnd) { +if (params.single_end) { ch_filter_bam.into { ch_rm_orphan_bam_metrics; ch_rm_orphan_bam_bigwig; ch_rm_orphan_bam_macs_1; @@ -905,8 +905,8 @@ process BigWig { script: prefix = "${name}.mLb.clN" - pe_fragment = params.singleEnd ? "" : "-pc" - extend = (params.singleEnd && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' + pe_fragment = params.single_end ? "" : "-pc" + extend = (params.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' """ SCALE_FACTOR=\$(grep 'mapped (' $flagstat | awk '{print 1000000/\$1}') echo \$SCALE_FACTOR > ${prefix}.scale_factor.txt @@ -1030,7 +1030,7 @@ process PlotFingerprint { file '*.raw.txt' into ch_plotfingerprint_mqc script: - extend = (params.singleEnd && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' + extend = (params.single_end && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' """ plotFingerprint \\ --bamfiles ${ipbam[0]} ${controlbam[0]} \\ @@ -1076,7 +1076,7 @@ process MACSCallPeak { script: peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" broad = params.narrowPeak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" - format = params.singleEnd ? "BAM" : "BAMPE" + format = params.single_end ? "BAM" : "BAMPE" pileup = params.saveMACSPileup ? "-B --SPMR" : "" """ macs2 callpeak \\ @@ -1314,8 +1314,8 @@ process ConsensusPeakSetDESeq { prefix = "${antibody}.consensus_peaks" peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() - bam_ext = params.singleEnd ? ".mLb.clN.sorted.bam" : ".mLb.clN.bam" - pe_params = params.singleEnd ? '' : "-p --donotsort" + bam_ext = params.single_end ? ".mLb.clN.sorted.bam" : ".mLb.clN.bam" + pe_params = params.single_end ? '' : "-p --donotsort" """ featureCounts \\ -F SAF \\ From a384642b8ac2e96ba980332be8ddda9a92228245 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 10:59:25 +0100 Subject: [PATCH 060/113] Update params --- main.nf | 92 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/main.nf b/main.nf index d0bddaa6..79748fa1 100755 --- a/main.nf +++ b/main.nf @@ -55,21 +55,21 @@ def helpMessage() { --save_align_intermeds Save the intermediate BAM files from the alignment step - not done by default Peaks - --narrowPeak Run MACS2 in narrowPeak mode - --broad_cutoff [float] Specifies broad cutoff value for MACS2. Only used when --narrowPeak isnt specified (Default: 0.1) + --narrow_peak Run MACS2 in narrowPeak mode + --broad_cutoff [float] Specifies broad cutoff value for MACS2. Only used when --narrow_peak isnt specified (Default: 0.1) --min_reps_consensus Number of biological replicates required from a given condition for a peak to contribute to a consensus peak (Default: 1) - --saveMACSPileup Instruct MACS2 to create bedGraph files normalised to signal per million reads - --skipDiffAnalysis Skip differential binding analysis + --save_macs_pileup Instruct MACS2 to create bedGraph files normalised to signal per million reads + --skip_diff_analysis Skip differential binding analysis QC - --skipFastQC Skip FastQC - --skipPicardMetrics Skip Picard CollectMultipleMetrics - --skipPreseq Skip Preseq - --skipPlotProfile Skip deepTools plotProfile - --skipPlotFingerprint Skip deepTools plotFingerprint - --skipSpp Skip Phantompeakqualtools - --skipIGV Skip IGV - --skipMultiQC Skip MultiQC + --skip_fastqc Skip FastQC + --skip_picard_metrics Skip Picard CollectMultipleMetrics + --skip_preseq Skip Preseq + --skip_plot_profile Skip deepTools plotProfile + --skip_plot_fingerprint Skip deepTools plotFingerprint + --skip_spp Skip Phantompeakqualtools + --skip_igv Skip IGV + --skip_multiqc Skip MultiQC Other --outdir The output directory where the results will be saved @@ -215,8 +215,8 @@ if (params.bwa_index) summary['BWA Index'] = params.bwa_index if (params.blacklist) summary['Blacklist BED'] = params.blacklist summary['MACS2 Genome Size'] = params.macs_gsize ?: 'Not supplied' summary['Min Consensus Reps'] = params.min_reps_consensus -if (params.macs_gsize) summary['MACS2 Narrow Peaks'] = params.narrowPeak ? 'Yes' : 'No' -if (!params.narrowPeak) summary['MACS2 Broad Cutoff'] = params.broad_cutoff +if (params.macs_gsize) summary['MACS2 Narrow Peaks'] = params.narrow_peak ? 'Yes' : 'No' +if (!params.narrow_peak) summary['MACS2 Broad Cutoff'] = params.broad_cutoff if (params.skip_trimming) { summary['Trimming Step'] = 'Skipped' } else { @@ -234,16 +234,16 @@ if (params.keep_multi_map) summary['Keep Multi-mapped'] = 'Yes' summary['Save Genome Index'] = params.save_reference ? 'Yes' : 'No' if (params.save_trimmed) summary['Save Trimmed'] = 'Yes' if (params.save_align_intermeds) summary['Save Intermeds'] = 'Yes' -if (params.saveMACSPileup) summary['Save MACS2 Pileup'] = 'Yes' -if (params.skipDiffAnalysis) summary['Skip Diff Analysis'] = 'Yes' -if (params.skipFastQC) summary['Skip FastQC'] = 'Yes' -if (params.skipPicardMetrics) summary['Skip Picard Metrics'] = 'Yes' -if (params.skipPreseq) summary['Skip Preseq'] = 'Yes' -if (params.skipPlotProfile) summary['Skip plotProfile'] = 'Yes' -if (params.skipPlotFingerprint) summary['Skip plotFingerprint'] = 'Yes' -if (params.skipSpp) summary['Skip spp'] = 'Yes' -if (params.skipIGV) summary['Skip IGV'] = 'Yes' -if (params.skipMultiQC) summary['Skip MultiQC'] = 'Yes' +if (params.save_macs_pileup) summary['Save MACS2 Pileup'] = 'Yes' +if (params.skip_diff_analysis) summary['Skip Diff Analysis'] = 'Yes' +if (params.skip_fastqc) summary['Skip FastQC'] = 'Yes' +if (params.skip_picard_metrics) summary['Skip Picard Metrics'] = 'Yes' +if (params.skip_preseq) summary['Skip Preseq'] = 'Yes' +if (params.skip_plot_profile) summary['Skip plotProfile'] = 'Yes' +if (params.skip_plot_fingerprint) summary['Skip plotFingerprint'] = 'Yes' +if (params.skip_spp) summary['Skip spp'] = 'Yes' +if (params.skip_igv) summary['Skip IGV'] = 'Yes' +if (params.skip_multiqc) summary['Skip MultiQC'] = 'Yes' summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" summary['Output Dir'] = params.outdir @@ -451,7 +451,7 @@ process FastQC { } when: - !params.skipFastQC + !params.skip_fastqc input: set val(name), file(reads) from ch_raw_reads_fastqc @@ -824,7 +824,7 @@ process Preseq { publishDir "${params.outdir}/bwa/mergedLibrary/preseq", mode: 'copy' when: - !params.skipPreseq + !params.skip_preseq input: set val(name), file(bam) from ch_merge_bam_preseq @@ -853,7 +853,7 @@ process CollectMultipleMetrics { } when: - !params.skipPicardMetrics + !params.skip_picard_metrics input: set val(name), file(bam) from ch_rm_orphan_bam_metrics @@ -927,7 +927,7 @@ process PlotProfile { publishDir "${params.outdir}/bwa/mergedLibrary/deepTools/plotProfile", mode: 'copy' when: - !params.skipPlotProfile + !params.skip_plot_profile input: set val(name), file(bigwig) from ch_bigwig_plotprofile @@ -966,7 +966,7 @@ process PhantomPeakQualTools { publishDir "${params.outdir}/bwa/mergedLibrary/phantompeakqualtools", mode: 'copy' when: - !params.skipSpp + !params.skip_spp input: set val(name), file(bam) from ch_rm_orphan_bam_phantompeakqualtools @@ -1020,7 +1020,7 @@ process PlotFingerprint { publishDir "${params.outdir}/bwa/mergedLibrary/deepTools/plotFingerprint", mode: 'copy' when: - !params.skipPlotFingerprint + !params.skip_plot_fingerprint input: set val(antibody), val(replicatesExist), val(multipleGroups), val(ip), file(ipbam), val(control), file(controlbam), file(ipflagstat) from ch_group_bam_plotfingerprint @@ -1074,10 +1074,10 @@ process MACSCallPeak { file "*_mqc.tsv" into ch_macs_mqc script: - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - broad = params.narrowPeak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" + peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" + broad = params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" format = params.single_end ? "BAM" : "BAMPE" - pileup = params.saveMACSPileup ? "-B --SPMR" : "" + pileup = params.save_macs_pileup ? "-B --SPMR" : "" """ macs2 callpeak \\ -t ${ipbam[0]} \\ @@ -1118,7 +1118,7 @@ process AnnotatePeaks { file "*.txt" into ch_macs_annotate script: - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" """ annotatePeaks.pl \\ $peak \\ @@ -1150,7 +1150,7 @@ process PeakQC { file "*.tsv" into ch_macs_qc_mqc script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" """ plot_macs_qc.r \\ -i ${peaks.join(',')} \\ @@ -1209,10 +1209,10 @@ process ConsensusPeakSet { script: // scripts are bundled with the pipeline, in nf-core/chipseq/bin/ prefix = "${antibody}.consensus_peaks" - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" - mergecols = params.narrowPeak ? (2..10).join(',') : (2..9).join(',') - collapsecols = params.narrowPeak ? (["collapse"]*9).join(',') : (["collapse"]*8).join(',') - expandparam = params.narrowPeak ? "--is_narrow_peak" : "" + peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" + mergecols = params.narrow_peak ? (2..10).join(',') : (2..9).join(',') + collapsecols = params.narrow_peak ? (["collapse"]*9).join(',') : (["collapse"]*8).join(',') + expandparam = params.narrow_peak ? "--is_narrow_peak" : "" """ sort -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\ | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt @@ -1256,7 +1256,7 @@ process ConsensusPeakSetAnnotate { script: prefix = "${antibody}.consensus_peaks" - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" """ annotatePeaks.pl \\ $bed \\ @@ -1293,7 +1293,7 @@ process ConsensusPeakSetDESeq { } when: - params.macs_gsize && !params.skipDiffAnalysis && replicatesExist && multipleGroups + params.macs_gsize && !params.skip_diff_analysis && replicatesExist && multipleGroups input: set val(antibody), val(replicatesExist), val(multipleGroups), file(bams) ,file(saf) from ch_group_bam_deseq @@ -1312,7 +1312,7 @@ process ConsensusPeakSetDESeq { script: prefix = "${antibody}.consensus_peaks" - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() bam_ext = params.single_end ? ".mLb.clN.sorted.bam" : ".mLb.clN.bam" pe_params = params.single_end ? '' : "-p --donotsort" @@ -1356,7 +1356,7 @@ process IGV { publishDir "${params.outdir}/igv/${peaktype}", mode: 'copy' when: - !params.skipIGV + !params.skip_igv input: file fasta from ch_fasta @@ -1369,7 +1369,7 @@ process IGV { file "*.{txt,xml}" into ch_igv_session script: // scripts are bundled with the pipeline, in nf-core/chipseq/bin/ - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" """ cat *.txt > igv_files.txt igv_files_to_session.py igv_session.xml igv_files.txt ../../reference_genome/${fasta.getName()} --path_prefix '../../' @@ -1446,7 +1446,7 @@ process MultiQC { publishDir "${params.outdir}/multiqc/${peaktype}", mode: 'copy' when: - !params.skipMultiQC + !params.skip_multiqc input: file multiqc_config from ch_multiqc_config @@ -1482,7 +1482,7 @@ process MultiQC { file "multiqc_plots" script: - peaktype = params.narrowPeak ? "narrowPeak" : "broadPeak" + peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" rtitle = custom_runName ? "--title \"$custom_runName\"" : '' rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' """ From cfaa1f13d63607ab79e509d96ce14c0ae550362a Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 11:05:09 +0100 Subject: [PATCH 061/113] Update help --- main.nf | 96 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/main.nf b/main.nf index 79748fa1..87eea347 100755 --- a/main.nf +++ b/main.nf @@ -19,68 +19,68 @@ def helpMessage() { nextflow run nf-core/chipseq --design design.csv --genome GRCh37 -profile docker Mandatory arguments: - --design Comma-separated file containing information about the samples in the experiment (see docs/usage.md) - --fasta Path to Fasta reference. Not mandatory when using reference in iGenomes config via --genome - --gtf Path to GTF file. Not mandatory when using reference in iGenomes config via --genome - -profile Configuration profile to use. Can use multiple (comma separated) - Available: conda, docker, singularity, awsbatch, test + --design [file] Comma-separated file containing information about the samples in the experiment (see docs/usage.md) + --fasta [file] Path to Fasta reference. Not mandatory when using reference in iGenomes config via --genome + --gtf [file] Path to GTF file. Not mandatory when using reference in iGenomes config via --genome + -profile [str] Configuration profile to use. Can use multiple (comma separated) + Available: conda, docker, singularity, awsbatch, test Generic - --single_end Specifies that the input is single-end reads - --seq_center Sequencing center information to be added to read group of BAM files - --fragment_size [int] Estimated fragment size used to extend single-end reads (Default: 200) - --fingerprint_bins [int] Number of genomic bins to use when calculating fingerprint plot (Default: 500000) - - References If not specified in the configuration file or you wish to overwrite any of the references - --genome Name of iGenomes reference - --bwa_index Full path to directory containing BWA index including base name i.e. /path/to/index/genome.fa - --gene_bed Path to BED file containing gene intervals - --tss_bed Path to BED file containing transcription start sites - --macs_gsize Effective genome size parameter required by MACS2. If using iGenomes config, values have only been provided when --genome is set as GRCh37, GRCm38, hg19, mm10, BDGP6 and WBcel235 - --blacklist Path to blacklist regions (.BED format), used for filtering alignments - --save_reference If generated by the pipeline save the BWA index in the results directory + --single_end [bool] Specifies that the input is single-end reads + --seq_center [str] Sequencing center information to be added to read group of BAM files + --fragment_size [int] Estimated fragment size used to extend single-end reads (Default: 200) + --fingerprint_bins [int] Number of genomic bins to use when calculating fingerprint plot (Default: 500000) + + References If not specified in the configuration file or you wish to overwrite any of the references + --genome [str] Name of iGenomes reference + --bwa_index [file] Full path to directory containing BWA index including base name i.e. /path/to/index/genome.fa + --gene_bed [file] Path to BED file containing gene intervals + --tss_bed [file] Path to BED file containing transcription start sites + --macs_gsize [float/str] Effective genome size parameter required by MACS2. If using iGenomes config, values have only been provided when --genome is set as GRCh37, GRCm38, hg19, mm10, BDGP6 and WBcel235 + --blacklist [file] Path to blacklist regions (.BED format), used for filtering alignments + --save_reference [bool] If generated by the pipeline save the BWA index in the results directory Trimming - --clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) (Default: 0) - --clip_r2 [int] Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only) (Default: 0) - --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed (Default: 0) - --three_prime_clip_r2 [int] Instructs Trim Galore to re move bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed (Default: 0) - --trim_nextseq [int] Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails (Default: 0) - --skip_trimming Skip the adapter trimming step - --save_trimmed Save the trimmed FastQ files in the results directory + --clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) (Default: 0) + --clip_r2 [int] Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only) (Default: 0) + --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed (Default: 0) + --three_prime_clip_r2 [int] Instructs Trim Galore to re move bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed (Default: 0) + --trim_nextseq [int] Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails (Default: 0) + --skip_trimming [bool] Skip the adapter trimming step + --save_trimmed [bool] Save the trimmed FastQ files in the results directory Alignments - --keep_dups Duplicate reads are not filtered from alignments - --keep_multi_map Reads mapping to multiple locations are not filtered from alignments - --save_align_intermeds Save the intermediate BAM files from the alignment step - not done by default + --keep_dups [bool] Duplicate reads are not filtered from alignments + --keep_multi_map [bool] Reads mapping to multiple locations are not filtered from alignments + --save_align_intermeds [bool] Save the intermediate BAM files from the alignment step - not done by default Peaks - --narrow_peak Run MACS2 in narrowPeak mode - --broad_cutoff [float] Specifies broad cutoff value for MACS2. Only used when --narrow_peak isnt specified (Default: 0.1) - --min_reps_consensus Number of biological replicates required from a given condition for a peak to contribute to a consensus peak (Default: 1) - --save_macs_pileup Instruct MACS2 to create bedGraph files normalised to signal per million reads - --skip_diff_analysis Skip differential binding analysis + --narrow_peak [bool] Run MACS2 in narrowPeak mode + --broad_cutoff [float] Specifies broad cutoff value for MACS2. Only used when --narrow_peak isnt specified (Default: 0.1) + --min_reps_consensus [int] Number of biological replicates required from a given condition for a peak to contribute to a consensus peak (Default: 1) + --save_macs_pileup [bool] Instruct MACS2 to create bedGraph files normalised to signal per million reads + --skip_diff_analysis [bool] Skip differential binding analysis QC - --skip_fastqc Skip FastQC - --skip_picard_metrics Skip Picard CollectMultipleMetrics - --skip_preseq Skip Preseq - --skip_plot_profile Skip deepTools plotProfile - --skip_plot_fingerprint Skip deepTools plotFingerprint - --skip_spp Skip Phantompeakqualtools - --skip_igv Skip IGV - --skip_multiqc Skip MultiQC + --skip_fastqc [bool] Skip FastQC + --skip_picard_metrics [bool] Skip Picard CollectMultipleMetrics + --skip_preseq [bool] Skip Preseq + --skip_plot_profile [bool] Skip deepTools plotProfile + --skip_plot_fingerprint [bool] Skip deepTools plotFingerprint + --skip_spp [bool] Skip Phantompeakqualtools + --skip_igv [bool] Skip IGV + --skip_multiqc [bool] Skip MultiQC Other - --outdir The output directory where the results will be saved - --email Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits - --email_on_fail Same as --email, except only send mail if the workflow is not successful - --max_multiqc_email_size Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) - -name Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic + --outdir [file] The output directory where the results will be saved + --email [email] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits + --email_on_fail [email] Same as --email, except only send mail if the workflow is not successful + --max_multiqc_email_size [str] Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) + -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic AWSBatch - --awsqueue The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion The AWS Region for your AWS Batch job to run on + --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch + --awsregion [str] The AWS Region for your AWS Batch job to run on """.stripIndent() } From 97e8c6d1f001e7a9b965fc6a41aa4dec8e5245a6 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 11:14:08 +0100 Subject: [PATCH 062/113] Update help --- main.nf | 96 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/main.nf b/main.nf index 87eea347..c1e8d342 100755 --- a/main.nf +++ b/main.nf @@ -19,68 +19,68 @@ def helpMessage() { nextflow run nf-core/chipseq --design design.csv --genome GRCh37 -profile docker Mandatory arguments: - --design [file] Comma-separated file containing information about the samples in the experiment (see docs/usage.md) - --fasta [file] Path to Fasta reference. Not mandatory when using reference in iGenomes config via --genome - --gtf [file] Path to GTF file. Not mandatory when using reference in iGenomes config via --genome - -profile [str] Configuration profile to use. Can use multiple (comma separated) - Available: conda, docker, singularity, awsbatch, test + --design [file] Comma-separated file containing information about the samples in the experiment (see docs/usage.md) + --fasta [file] Path to Fasta reference. Not mandatory when using reference in iGenomes config via --genome + --gtf [file] Path to GTF file. Not mandatory when using reference in iGenomes config via --genome + -profile [str] Configuration profile to use. Can use multiple (comma separated) + Available: conda, docker, singularity, awsbatch, test Generic - --single_end [bool] Specifies that the input is single-end reads - --seq_center [str] Sequencing center information to be added to read group of BAM files - --fragment_size [int] Estimated fragment size used to extend single-end reads (Default: 200) - --fingerprint_bins [int] Number of genomic bins to use when calculating fingerprint plot (Default: 500000) - - References If not specified in the configuration file or you wish to overwrite any of the references - --genome [str] Name of iGenomes reference - --bwa_index [file] Full path to directory containing BWA index including base name i.e. /path/to/index/genome.fa - --gene_bed [file] Path to BED file containing gene intervals - --tss_bed [file] Path to BED file containing transcription start sites - --macs_gsize [float/str] Effective genome size parameter required by MACS2. If using iGenomes config, values have only been provided when --genome is set as GRCh37, GRCm38, hg19, mm10, BDGP6 and WBcel235 - --blacklist [file] Path to blacklist regions (.BED format), used for filtering alignments - --save_reference [bool] If generated by the pipeline save the BWA index in the results directory + --single_end [bool] Specifies that the input is single-end reads + --seq_center [str] Sequencing center information to be added to read group of BAM files + --fragment_size [int] Estimated fragment size used to extend single-end reads (Default: 200) + --fingerprint_bins [int] Number of genomic bins to use when calculating fingerprint plot (Default: 500000) + + References If not specified in the configuration file or you wish to overwrite any of the references + --genome [str] Name of iGenomes reference + --bwa_index [file] Full path to directory containing BWA index including base name i.e. /path/to/index/genome.fa + --gene_bed [file] Path to BED file containing gene intervals + --tss_bed [file] Path to BED file containing transcription start sites + --macs_gsize [str] Effective genome size parameter required by MACS2. If using iGenomes config, values have only been provided when --genome is set as GRCh37, GRCm38, hg19, mm10, BDGP6 and WBcel235 + --blacklist [file] Path to blacklist regions (.BED format), used for filtering alignments + --save_reference [bool] If generated by the pipeline save the BWA index in the results directory Trimming - --clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) (Default: 0) - --clip_r2 [int] Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only) (Default: 0) - --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed (Default: 0) - --three_prime_clip_r2 [int] Instructs Trim Galore to re move bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed (Default: 0) - --trim_nextseq [int] Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails (Default: 0) - --skip_trimming [bool] Skip the adapter trimming step - --save_trimmed [bool] Save the trimmed FastQ files in the results directory + --clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) (Default: 0) + --clip_r2 [int] Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only) (Default: 0) + --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed (Default: 0) + --three_prime_clip_r2 [int] Instructs Trim Galore to re move bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed (Default: 0) + --trim_nextseq [int] Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails (Default: 0) + --skip_trimming [bool] Skip the adapter trimming step + --save_trimmed [bool] Save the trimmed FastQ files in the results directory Alignments - --keep_dups [bool] Duplicate reads are not filtered from alignments - --keep_multi_map [bool] Reads mapping to multiple locations are not filtered from alignments - --save_align_intermeds [bool] Save the intermediate BAM files from the alignment step - not done by default + --keep_dups [bool] Duplicate reads are not filtered from alignments + --keep_multi_map [bool] Reads mapping to multiple locations are not filtered from alignments + --save_align_intermeds [bool] Save the intermediate BAM files from the alignment step - not done by default Peaks - --narrow_peak [bool] Run MACS2 in narrowPeak mode - --broad_cutoff [float] Specifies broad cutoff value for MACS2. Only used when --narrow_peak isnt specified (Default: 0.1) - --min_reps_consensus [int] Number of biological replicates required from a given condition for a peak to contribute to a consensus peak (Default: 1) - --save_macs_pileup [bool] Instruct MACS2 to create bedGraph files normalised to signal per million reads - --skip_diff_analysis [bool] Skip differential binding analysis + --narrow_peak [bool] Run MACS2 in narrowPeak mode + --broad_cutoff [float] Specifies broad cutoff value for MACS2. Only used when --narrow_peak isnt specified (Default: 0.1) + --min_reps_consensus [int] Number of biological replicates required from a given condition for a peak to contribute to a consensus peak (Default: 1) + --save_macs_pileup [bool] Instruct MACS2 to create bedGraph files normalised to signal per million reads + --skip_diff_analysis [bool] Skip differential binding analysis QC - --skip_fastqc [bool] Skip FastQC - --skip_picard_metrics [bool] Skip Picard CollectMultipleMetrics - --skip_preseq [bool] Skip Preseq - --skip_plot_profile [bool] Skip deepTools plotProfile - --skip_plot_fingerprint [bool] Skip deepTools plotFingerprint - --skip_spp [bool] Skip Phantompeakqualtools - --skip_igv [bool] Skip IGV - --skip_multiqc [bool] Skip MultiQC + --skip_fastqc [bool] Skip FastQC + --skip_picard_metrics [bool] Skip Picard CollectMultipleMetrics + --skip_preseq [bool] Skip Preseq + --skip_plot_profile [bool] Skip deepTools plotProfile + --skip_plot_fingerprint [bool] Skip deepTools plotFingerprint + --skip_spp [bool] Skip Phantompeakqualtools + --skip_igv [bool] Skip IGV + --skip_multiqc [bool] Skip MultiQC Other - --outdir [file] The output directory where the results will be saved - --email [email] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits - --email_on_fail [email] Same as --email, except only send mail if the workflow is not successful - --max_multiqc_email_size [str] Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) - -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic + --outdir [file] The output directory where the results will be saved + --email [email] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits + --email_on_fail [email] Same as --email, except only send mail if the workflow is not successful + --max_multiqc_email_size [str] Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) + -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic AWSBatch - --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion [str] The AWS Region for your AWS Batch job to run on + --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch + --awsregion [str] The AWS Region for your AWS Batch job to run on """.stripIndent() } From 0e94e161c3d87d8420ab231df2419c7f2e599554 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 11:19:50 +0100 Subject: [PATCH 063/113] Update header log info --- main.nf | 88 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/main.nf b/main.nf index c1e8d342..7bf6a618 100755 --- a/main.nf +++ b/main.nf @@ -203,66 +203,66 @@ if (workflow.profile == 'awsbatch') { // Header log info log.info nfcoreHeader() def summary = [:] -summary['Run Name'] = custom_runName ?: workflow.runName -summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End' -summary['Design File'] = params.design -summary['Genome'] = params.genome ?: 'Not supplied' -summary['Fasta File'] = params.fasta -summary['GTF File'] = params.gtf -if (params.gene_bed) summary['Gene BED File'] = params.gene_bed -if (params.tss_bed) summary['TSS BED File'] = params.tss_bed -if (params.bwa_index) summary['BWA Index'] = params.bwa_index -if (params.blacklist) summary['Blacklist BED'] = params.blacklist -summary['MACS2 Genome Size'] = params.macs_gsize ?: 'Not supplied' -summary['Min Consensus Reps'] = params.min_reps_consensus -if (params.macs_gsize) summary['MACS2 Narrow Peaks'] = params.narrow_peak ? 'Yes' : 'No' -if (!params.narrow_peak) summary['MACS2 Broad Cutoff'] = params.broad_cutoff +summary['Run Name'] = custom_runName ?: workflow.runName +summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End' +summary['Design File'] = params.design +summary['Genome'] = params.genome ?: 'Not supplied' +summary['Fasta File'] = params.fasta +summary['GTF File'] = params.gtf +if (params.gene_bed) summary['Gene BED File'] = params.gene_bed +if (params.tss_bed) summary['TSS BED File'] = params.tss_bed +if (params.bwa_index) summary['BWA Index'] = params.bwa_index +if (params.blacklist) summary['Blacklist BED'] = params.blacklist +summary['MACS2 Genome Size'] = params.macs_gsize ?: 'Not supplied' +summary['Min Consensus Reps'] = params.min_reps_consensus +if (params.macs_gsize) summary['MACS2 Narrow Peaks'] = params.narrow_peak ? 'Yes' : 'No' +if (!params.narrow_peak) summary['MACS2 Broad Cutoff'] = params.broad_cutoff if (params.skip_trimming) { - summary['Trimming Step'] = 'Skipped' + summary['Trimming Step'] = 'Skipped' } else { - summary['Trim R1'] = "$params.clip_r1 bp" - summary['Trim R2'] = "$params.clip_r2 bp" - summary["Trim 3' R1"] = "$params.three_prime_clip_r1 bp" - summary["Trim 3' R2"] = "$params.three_prime_clip_r2 bp" - summary["NextSeq Trim"] = "$params.trim_nextseq bp" + summary['Trim R1'] = "$params.clip_r1 bp" + summary['Trim R2'] = "$params.clip_r2 bp" + summary["Trim 3' R1"] = "$params.three_prime_clip_r1 bp" + summary["Trim 3' R2"] = "$params.three_prime_clip_r2 bp" + summary["NextSeq Trim"] = "$params.trim_nextseq bp" } -if (params.seq_center) summary['Sequencing Center'] = params.seq_center -if (params.single_end) summary['Fragment Size'] = "$params.fragment_size bp" -summary['Fingerprint Bins'] = params.fingerprint_bins -if (params.keep_dups) summary['Keep Duplicates'] = 'Yes' +if (params.seq_center) summary['Sequencing Center'] = params.seq_center +if (params.single_end) summary['Fragment Size'] = "$params.fragment_size bp" +summary['Fingerprint Bins'] = params.fingerprint_bins +if (params.keep_dups) summary['Keep Duplicates'] = 'Yes' if (params.keep_multi_map) summary['Keep Multi-mapped'] = 'Yes' -summary['Save Genome Index'] = params.save_reference ? 'Yes' : 'No' -if (params.save_trimmed) summary['Save Trimmed'] = 'Yes' -if (params.save_align_intermeds) summary['Save Intermeds'] = 'Yes' +summary['Save Genome Index'] = params.save_reference ? 'Yes' : 'No' +if (params.save_trimmed) summary['Save Trimmed'] = 'Yes' +if (params.save_align_intermeds) summary['Save Intermeds'] = 'Yes' if (params.save_macs_pileup) summary['Save MACS2 Pileup'] = 'Yes' if (params.skip_diff_analysis) summary['Skip Diff Analysis'] = 'Yes' -if (params.skip_fastqc) summary['Skip FastQC'] = 'Yes' +if (params.skip_fastqc) summary['Skip FastQC'] = 'Yes' if (params.skip_picard_metrics) summary['Skip Picard Metrics'] = 'Yes' -if (params.skip_preseq) summary['Skip Preseq'] = 'Yes' +if (params.skip_preseq) summary['Skip Preseq'] = 'Yes' if (params.skip_plot_profile) summary['Skip plotProfile'] = 'Yes' if (params.skip_plot_fingerprint) summary['Skip plotFingerprint'] = 'Yes' -if (params.skip_spp) summary['Skip spp'] = 'Yes' -if (params.skip_igv) summary['Skip IGV'] = 'Yes' -if (params.skip_multiqc) summary['Skip MultiQC'] = 'Yes' -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" +if (params.skip_spp) summary['Skip spp'] = 'Yes' +if (params.skip_igv) summary['Skip IGV'] = 'Yes' +if (params.skip_multiqc) summary['Skip MultiQC'] = 'Yes' +summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" -summary['Output Dir'] = params.outdir -summary['Launch Dir'] = workflow.launchDir -summary['Working Dir'] = workflow.workDir -summary['Script Dir'] = workflow.projectDir -summary['User'] = workflow.userName +summary['Output Dir'] = params.outdir +summary['Launch Dir'] = workflow.launchDir +summary['Working Dir'] = workflow.workDir +summary['Script Dir'] = workflow.projectDir +summary['User'] = workflow.userName if (workflow.profile == 'awsbatch') { - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue + summary['AWS Region'] = params.awsregion + summary['AWS Queue'] = params.awsqueue } -summary['Config Profile'] = workflow.profile +summary['Config Profile'] = workflow.profile if (params.config_profile_description) summary['Config Description'] = params.config_profile_description if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact if (params.config_profile_url) summary['Config URL'] = params.config_profile_url if (params.email || params.email_on_fail) { - summary['E-mail Address'] = params.email - summary['E-mail on failure'] = params.email_on_fail - summary['MultiQC Max Size'] = params.max_multiqc_email_size + summary['E-mail Address'] = params.email + summary['E-mail on failure'] = params.email_on_fail + summary['MultiQC Max Size'] = params.max_multiqc_email_size } log.info summary.collect { k,v -> "${k.padRight(20)}: $v" }.join("\n") log.info "-\033[2m--------------------------------------------------\033[0m-" From 9e6f1e188a488e790fe3b5334c59f985f4acb357 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 11:48:31 +0100 Subject: [PATCH 064/113] Update params --- nextflow.config | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/nextflow.config b/nextflow.config index 5eebadd7..146e1abc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,7 +9,7 @@ params { // Options: Generic - singleEnd = false + single_end = false seq_center = false fragment_size = 200 fingerprint_bins = 500000 @@ -17,7 +17,7 @@ params { // Options: References genome = false tss_bed = false - saveReference = false + save_reference = false // Options: Trimming clip_r1 = 0 @@ -25,30 +25,30 @@ params { three_prime_clip_r1 = 0 three_prime_clip_r2 = 0 trim_nextseq = 0 - skipTrimming = false - saveTrimmed = false + skip_trimming = false + save_trimmed = false // Options: Alignments - keepDups = false - keepMultiMap = false - saveAlignedIntermediates = false + keep_dups = false + keep_multi_map = false + save_align_intermeds = false // Options: Peaks - narrowPeak = false + narrow_peak = false broad_cutoff = 0.1 min_reps_consensus = 1 - saveMACSPileup = false - skipDiffAnalysis = false + save_macs_pileup = false + skip_diff_analysis = false // Options: QC - skipFastQC = false - skipPicardMetrics = false - skipPreseq = false - skipPlotProfile = false - skipPlotFingerprint = false - skipSpp = false - skipIGV = false - skipMultiQC = false + skip_fastqc = false + skip_picard_metrics = false + skip_preseq = false + skip_plot_profile = false + skip_plot_fingerprint = false + skip_spp = false + skip_igv = false + skip_multiqc = false // Options: AWSBatch awsqueue = false @@ -70,7 +70,7 @@ params { help = false outdir = './results' igenomes_base = "./iGenomes" - igenomesIgnore = false + igenomes_ignore = false max_multiqc_email_size = 25.MB tracedir = "${params.outdir}/pipeline_info" email = false @@ -113,7 +113,7 @@ profiles { docker.runOptions = '-u \$(id -u):\$(id -g)' // Load igenomes.config if required -if (!params.igenomesIgnore) { +if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' } From d8b1aa9bace5f9da6ec966208f89650ab97377bf Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 11:52:23 +0100 Subject: [PATCH 065/113] Export Conda env in Docker --- Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/Dockerfile b/Dockerfile index e1fc9efe..16b864c9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,4 +4,5 @@ LABEL authors="Philip Ewels" \ COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a +RUN conda env export --name nf-core-chipseq-1.0.1dev > nf-core-chipseq-1.0.1dev.yml ENV PATH /opt/conda/envs/nf-core-chipseq-1.0.1dev/bin:$PATH From 7cdb174f75c3e9fcd6b53e46f29281ea04465459 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 11:55:00 +0100 Subject: [PATCH 066/113] Update CHANGELOG --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3793bebe..e2451a41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Add quick start information to main README * Update template to tools `1.7` * Bump Nextflow version to `19.04.0` +* Change all parameters from `camelCase` to `snake_case` ### `Fixed` @@ -23,7 +24,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#50](https://github.com/nf-core/atacseq/issues/50) - HOMER number of peaks does not correspond to found MACS2 peaks * Increase default resource requirements in `base.config` * Increase process-specific requirements based on user-reported failures -* Change parameter `saveGenomeIndex` to `saveReference` +* Change parameter `saveGenomeIndex` to `save_reference` ### `Dependencies` From 53bbd4d8eb499abdc3b03d624d53dd089412406d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 12:17:37 +0100 Subject: [PATCH 067/113] Update output docs --- docs/output.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/output.md b/docs/output.md index d2b019d4..ba3db0cb 100644 --- a/docs/output.md +++ b/docs/output.md @@ -39,7 +39,7 @@ The initial QC and alignments are performed at the library-level e.g. if the sam *Output directories*: * `trim_galore/` - If `--saveTrimmed` is specified FastQ files **after** adapter trimming will be placed in this directory. + If `--save_trimmed` is specified FastQ files **after** adapter trimming will be placed in this directory. * `trim_galore/logs/` `*.log` files generated by Trim Galore!. * `trim_galore/fastqc/` @@ -53,7 +53,7 @@ The initial QC and alignments are performed at the library-level e.g. if the sam [BWA](http://bio-bwa.sourceforge.net/bwa.shtml), [SAMtools](http://samtools.sourceforge.net/) *Description*: - Adapter-trimmed reads are mapped to the reference assembly using BWA. A genome index is required to run BWA so if this is not provided explicitly using the `--bwa_index` parameter then it will be created automatically from the genome fasta input. The index creation process can take a while for larger genomes so it is possible to use the `--saveReference` parameter to save the indices for future pipeline runs, reducing processing times. + Adapter-trimmed reads are mapped to the reference assembly using BWA. A genome index is required to run BWA so if this is not provided explicitly using the `--bwa_index` parameter then it will be created automatically from the genome fasta input. The index creation process can take a while for larger genomes so it is possible to use the `--save_reference` parameter to save the indices for future pipeline runs, reducing processing times. ![MultiQC - SAMtools stats plot](images/mqc_samtools_stats_plot.png) @@ -77,7 +77,7 @@ The library-level alignments associated with the same sample are merged and subs *Description*: Picard MergeSamFiles and MarkDuplicates are used in combination to merge the alignments, and for the marking of duplicates, respectively. If you only have one library for any given replicate then the merging step isnt carried out because the library-level and merged library-level BAM files will be exactly the same. - Read duplicate marking is carried out using the Picard MarkDuplicates command. Duplicate reads are generally removed from the aligned reads to mitigate for fragments in the library that may have been sequenced more than once due to PCR biases. There is an option to keep duplicate reads with the `--keepDups` parameter but its generally recommended to remove them to avoid the wrong interpretation of the results. A similar option has been provided to keep reads that are multi-mapped - `--keepMultiMap`. Other steps have been incorporated into the pipeline to filter the resulting alignments - see [`main README.md`](../README.md) for a more comprehensive listing, and the tools used at each step. + Read duplicate marking is carried out using the Picard MarkDuplicates command. Duplicate reads are generally removed from the aligned reads to mitigate for fragments in the library that may have been sequenced more than once due to PCR biases. There is an option to keep duplicate reads with the `--keep_dups` parameter but its generally recommended to remove them to avoid the wrong interpretation of the results. A similar option has been provided to keep reads that are multi-mapped - `--keep_multi_map`. Other steps have been incorporated into the pipeline to filter the resulting alignments - see [`main README.md`](../README.md) for a more comprehensive listing, and the tools used at each step. A selection of alignment-based QC metrics generated by Picard CollectMultipleMetrics and MarkDuplicates will be included in the MultiQC report. @@ -158,7 +158,7 @@ The library-level alignments associated with the same sample are merged and subs [MACS2](https://github.com/taoliu/MACS), [HOMER](http://homer.ucsd.edu/homer/ngs/annotation.html) *Description*: - MACS2 is one of the most popular peak-calling algorithms for ChIP-seq data. By default, the peaks are called with the MACS2 `--broad` parameter. If, however, you would like to call narrow peaks then please provide the `--narrowPeak` parameter when running the pipeline. See [MACS2 outputs](https://github.com/taoliu/MACS#output-files) for a description of the output files generated by MACS2. + MACS2 is one of the most popular peak-calling algorithms for ChIP-seq data. By default, the peaks are called with the MACS2 `--broad` parameter. If, however, you would like to call narrow peaks then please provide the `--narrow_peak` parameter when running the pipeline. See [MACS2 outputs](https://github.com/taoliu/MACS#output-files) for a description of the output files generated by MACS2. ![MultiQC - MACS2 total peak count plot](images/mqc_macs2_peak_count_plot.png) @@ -170,7 +170,7 @@ The library-level alignments associated with the same sample are merged and subs ![MultiQC - MACS2 peaks FRiP score plot](images/mqc_frip_score_plot.png) - `` in the directory structure below corresponds to the type of peak that you have specified to call with MACS2 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrowPeak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline. + `` in the directory structure below corresponds to the type of peak that you have specified to call with MACS2 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrow_peak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline. *Output directories*: * `bwa/mergedLibrary/macs//` @@ -304,7 +304,7 @@ The library-level alignments associated with the same sample are merged and subs * `reference_genome/` A number of genome-specific files are generated by the pipeline in order to aid in the filtering of the data, and because they are required by standard tools such as BEDTools. These can be found in this directory along with the genome fasta file which is required by IGV. * `reference_genome/BWAIndex/` - If the `--saveReference` parameter is provided then the alignment indices generated by the pipeline will be saved in this directory. This can be quite a time-consuming process so it permits their reuse for future runs of the pipeline or for other purposes. + If the `--save_reference` parameter is provided then the alignment indices generated by the pipeline will be saved in this directory. This can be quite a time-consuming process so it permits their reuse for future runs of the pipeline or for other purposes. 2. **Pipeline information** From 6d1194a12fdfff81536c9b082c39422bfc4a1de4 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 12:30:52 +0100 Subject: [PATCH 068/113] Update usage docs --- docs/usage.md | 72 +++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 5fde2736..54ccb504 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -13,7 +13,7 @@ * [`-profile`](#-profile) * [`--design`](#--design) * [Generic arguments](#generic-arguments) - * [`--singleEnd`](#--singleend) + * [`--single_end`](#--single_end) * [`--seq_center`](#--seq_center) * [`--fragment_size`](#--fragment_size) * [`--fingerprint_bins`](#--fingerprint_bins) @@ -26,21 +26,21 @@ * [`--tss_bed`](#--tss_bed) * [`--macs_gsize`](#--macs_gsize) * [`--blacklist`](#--blacklist) - * [`--saveReference`](#--saveReference) - * [`--igenomesIgnore`](#--igenomesignore) + * [`--save_reference`](#--save_reference) + * [`--igenomes_ignore`](#--igenomes_ignore) * [Adapter trimming](#adapter-trimming) - * [`--skipTrimming`](#--skiptrimming) - * [`--saveTrimmed`](#--savetrimmed) + * [`--skip_trimming`](#--skip_trimming) + * [`--save_trimmed`](#--save_trimmed) * [Alignments](#alignments) - * [`--keepDups`](#--keepdups) - * [`--keepMultiMap`](#--keepmultimap) - * [`--saveAlignedIntermediates`](#--savealignedintermediates) + * [`--keep_dups`](#--keep_dups) + * [`--keep_multi_map`](#--keep_multi_map) + * [`--save_align_intermeds`](#--save_align_intermeds) * [Peaks](#peaks) - * [`--narrowPeak`](#--narrowpeak) + * [`--narrow_peak`](#--narrow_peak) * [`--broad_cutoff`](#--broad_cutoff) * [`--min_reps_consensus`](#--min_reps_consensus) - * [`--saveMACSPileup`](#--savemacspileup) - * [`--skipDiffAnalysis`](#--skipdiffanalysis) + * [`--save_macs_pileup`](#--save_macs_pileup) + * [`--skip_diff_analysis`](#--skip_diff_analysis) * [Skipping QC steps](#skipping-qc-steps) * [Job resources](#job-resources) * [Automatic resubmission](#automatic-resubmission) @@ -216,8 +216,8 @@ Example design files have been provided with the pipeline for [paired-end](../as ## Generic arguments -### `--singleEnd` -By default, the pipeline expects paired-end data. If you have single-end data, specify `--singleEnd` on the command line when you launch the pipeline. +### `--single_end` +By default, the pipeline expects paired-end data. If you have single-end data, specify `--single_end` on the command line when you launch the pipeline. It is not possible to run a mixture of single-end and paired-end files in one run. @@ -270,7 +270,7 @@ params { ``` ### `--fasta` -Full path to fasta file containing reference genome (*mandatory* if `--genome` is not specified). If you don't have a BWA index available this will be generated for you automatically. Combine with `--saveReference` to save BWA index for future runs. +Full path to fasta file containing reference genome (*mandatory* if `--genome` is not specified). If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs. ```bash --fasta '[path to FASTA reference]' @@ -318,10 +318,10 @@ If provided, alignments that overlap with the regions in this file will be filte --blacklist '[path to blacklisted regions]' ``` -### `--saveReference` +### `--save_reference` If the BWA index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times. -### `--igenomesIgnore` +### `--igenomes_ignore` Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`. ## Adapter trimming @@ -339,30 +339,30 @@ You can specify custom trimming parameters as follows: * `--trim_nextseq [int]` * This enables the option Cutadapt `--nextseq-trim=3'CUTOFF` option via Trim Galore, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. -### `--skipTrimming` +### `--skip_trimming` Skip the adapter trimming step. Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data. -### `--saveTrimmed` +### `--save_trimmed` By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete. ## Alignments - -### `--keepDups` +narrow_peak +### `--keep_dups` Duplicate reads are not filtered from alignments. -### `--keepMultiMap` +### `--keep_multi_map` Reads mapping to multiple locations in the genome are not filtered from alignments. -### `--saveAlignedIntermediates` +### `--save_align_intermeds` By default, intermediate BAM files will not be saved. The final BAM files created after the appropriate filtering step are always saved to limit storage usage. Set to true to also save other intermediate BAM files. ## Peaks -### `--narrowPeak` +### `--narrow_peak` MACS2 is run by default with the [`--broad`](https://github.com/taoliu/MACS#--broad) flag. Specify this flag to call peaks in narrowPeak mode. ### `--broad_cutoff` -Specifies broad cut-off value for MACS2. Only used when `--narrowPeak` isnt specified. Default: 0.1 +Specifies broad cut-off value for MACS2. Only used when `--narrow_peak` isnt specified. Default: 0.1 ### `--min_reps_consensus` Number of biological replicates required from a given condition for a peak to contribute to a consensus peak . If you are confident you have good reproducibility amongst your replicates then you can increase the value of this parameter to create a "reproducible" set of consensus of peaks. For example, a value of 2 will mean peaks that have been called in at least 2 replicates will contribute to the consensus set of peaks, and as such peaks that are unique to a given replicate will be discarded. @@ -371,10 +371,10 @@ Number of biological replicates required from a given condition for a peak to co -- min_reps_consensus 1 ``` -### `--saveMACSPileup` +### `--save_macs_pileup` Instruct MACS2 to create bedGraph files using the `-B --SPMR` parameters. -### `--skipDiffAnalysis` +### `--skip_diff_analysis` Skip read counting and differential analysis step. ## Skipping QC steps @@ -382,16 +382,16 @@ Skip read counting and differential analysis step. The pipeline contains a large number of quality control steps. Sometimes, it may not be desirable to run all of them if time and compute resources are limited. The following options make this easy: -| Step | Description | -|-------------------------|------------------------------------| -| `--skipFastQC` | Skip FastQC | -| `--skipPicardMetrics` | Skip Picard CollectMultipleMetrics | -| `--skipPreseq` | Skip Preseq | -| `--skipPlotProfile` | Skip deepTools plotProfile | -| `--skipPlotFingerprint` | Skip deepTools plotFingerprint | -| `--skipSpp` | Skip Phantompeakqualtools | -| `--skipIGV` | Skip IGV | -| `--skipMultiQC` | Skip MultiQC | +| Step | Description | +|---------------------------|------------------------------------| +| `--skip_fastqc` | Skip FastQC | +| `--skip_picard_metrics` | Skip Picard CollectMultipleMetrics | +| `--skip_preseq` | Skip Preseq | +| `--skip_plot_profile` | Skip deepTools plotProfile | +| `--skip_plot_fingerprint` | Skip deepTools plotFingerprint | +| `--skip_spp` | Skip Phantompeakqualtools | +| `--skip_igv` | Skip IGV | +| `--skip_multiqc` | Skip MultiQC | ## Job resources ### Automatic resubmission From bc77279892172dfb018cb2e02c36de9dd9c25ca2 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 12:40:35 +0100 Subject: [PATCH 069/113] Change --design to --input --- CHANGELOG.md | 1 + README.md | 2 +- conf/test.config | 2 +- docs/usage.md | 8 ++++---- main.nf | 10 +++++----- 5 files changed, 12 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e2451a41..f8dca341 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Update template to tools `1.7` * Bump Nextflow version to `19.04.0` * Change all parameters from `camelCase` to `snake_case` +* Change `--design` parameter to `--input` for standardisation ### `Fixed` diff --git a/README.md b/README.md index 2685a359..db6bca9a 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ nextflow run nf-core/chipseq -profile test, iv. Start running your own analysis! ```bash -nextflow run nf-core/chipseq -profile --design design.csv --genome GRCh37 +nextflow run nf-core/chipseq -profile --input design.csv --genome GRCh37 ``` See [usage docs](docs/usage.md) for all of the available options when running the pipeline. diff --git a/conf/test.config b/conf/test.config index 52f17e7f..2fdcb4e8 100644 --- a/conf/test.config +++ b/conf/test.config @@ -16,7 +16,7 @@ params { max_time = 12.h // Input data - design = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/design.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/design.csv' // Genome references fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genome.fa' diff --git a/docs/usage.md b/docs/usage.md index 54ccb504..61a1a214 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -11,7 +11,7 @@ * [Reproducibility](#reproducibility) * [Main arguments](#main-arguments) * [`-profile`](#-profile) - * [`--design`](#--design) + * [`--input`](#--input) * [Generic arguments](#generic-arguments) * [`--single_end`](#--single_end) * [`--seq_center`](#--seq_center) @@ -80,7 +80,7 @@ NXF_OPTS='-Xms1g -Xmx4g' The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/chipseq --design design.csv --genome GRCh37 -profile docker +nextflow run nf-core/chipseq --input design.csv --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -131,11 +131,11 @@ If `-profile` is not specified at all the pipeline will be run locally and expec * A profile with a complete configuration for automated testing * Includes links to test data so needs no other parameters -### `--design` +### `--input` You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. ```bash ---design '[path to design file]' +--input '[path to design file]' ``` #### Multiple replicates diff --git a/main.nf b/main.nf index 7bf6a618..79958701 100755 --- a/main.nf +++ b/main.nf @@ -16,10 +16,10 @@ def helpMessage() { The typical command for running the pipeline is as follows: - nextflow run nf-core/chipseq --design design.csv --genome GRCh37 -profile docker + nextflow run nf-core/chipseq --input design.csv --genome GRCh37 -profile docker Mandatory arguments: - --design [file] Comma-separated file containing information about the samples in the experiment (see docs/usage.md) + --input [file] Comma-separated file containing information about the samples in the experiment (see docs/usage.md) --fasta [file] Path to Fasta reference. Not mandatory when using reference in iGenomes config via --genome --gtf [file] Path to GTF file. Not mandatory when using reference in iGenomes config via --genome -profile [str] Configuration profile to use. Can use multiple (comma separated) @@ -156,7 +156,7 @@ ch_spp_rsc_header = file("$baseDir/assets/multiqc/spp_rsc_header.txt", checkIfEx //////////////////////////////////////////////////// // Validate inputs -if (params.design) { ch_design = file(params.design, checkIfExists: true) } else { exit 1, "Samples design file not specified!" } +if (params.input) { ch_input = file(params.input, checkIfExists: true) } else { exit 1, "Samples design file not specified!" } if (params.gtf) { ch_gtf = file(params.gtf, checkIfExists: true) } else { exit 1, "GTF annotation file not specified!" } if (params.gene_bed) { ch_gene_bed = file(params.gene_bed, checkIfExists: true) } if (params.tss_bed) { ch_tss_bed = file(params.tss_bed, checkIfExists: true) } @@ -205,7 +205,7 @@ log.info nfcoreHeader() def summary = [:] summary['Run Name'] = custom_runName ?: workflow.runName summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End' -summary['Design File'] = params.design +summary['Design File'] = params.input summary['Genome'] = params.genome ?: 'Not supplied' summary['Fasta File'] = params.fasta summary['GTF File'] = params.gtf @@ -296,7 +296,7 @@ process CheckDesign { publishDir "${params.outdir}/pipeline_info", mode: 'copy' input: - file design from ch_design + file design from ch_input output: file "design_reads.csv" into ch_design_reads_csv From aa17e70a4302812cf35d51b0e37cbca4409bf18b Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 13:12:40 +0100 Subject: [PATCH 070/113] Update CHANGELOG --- CHANGELOG.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f8dca341..1b58b332 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,9 +13,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Capitalised process names * Add quick start information to main README * Update template to tools `1.7` -* Bump Nextflow version to `19.04.0` -* Change all parameters from `camelCase` to `snake_case` -* Change `--design` parameter to `--input` for standardisation +* Add `--trim_nextseq` parameter ### `Fixed` @@ -26,9 +24,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Increase default resource requirements in `base.config` * Increase process-specific requirements based on user-reported failures * Change parameter `saveGenomeIndex` to `save_reference` +* Change parameter `--design` to `--input` +* Change all parameters from `camelCase` to `snake_case` +* Fixed bug in UpSetR peak intersection plot ### `Dependencies` +* Bump Nextflow version to `19.04.0` ## [1.0.0] - 2019-06-06 From 14fbf7f0d448085483c77c30ae7ea82a5327c146 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 13:41:33 +0100 Subject: [PATCH 071/113] Update indents for channels --- main.nf | 113 ++++++++++++++++++++++++++++++++------------------------ 1 file changed, 64 insertions(+), 49 deletions(-) diff --git a/main.nf b/main.nf index 79958701..5d36ccfa 100755 --- a/main.nf +++ b/main.nf @@ -174,8 +174,9 @@ if (params.bwa_index) { lastPath = params.bwa_index.lastIndexOf(File.separator) bwa_dir = params.bwa_index.substring(0,lastPath+1) bwa_base = params.bwa_index.substring(lastPath+1) - ch_bwa_index = Channel + Channel .fromPath(bwa_dir, checkIfExists: true) + .set { ch_bwa_index } } //////////////////////////////////////////////////// @@ -312,23 +313,26 @@ process CheckDesign { * Create channels for input fastq files */ if (params.single_end) { - ch_design_reads_csv.splitCsv(header:true, sep:',') - .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true) ] ] } - .into { ch_raw_reads_fastqc; - ch_raw_reads_trimgalore } + ch_design_reads_csv + .splitCsv(header:true, sep:',') + .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true) ] ] } + .into { ch_raw_reads_fastqc; + ch_raw_reads_trimgalore } } else { - ch_design_reads_csv.splitCsv(header:true, sep:',') - .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true), file(row.fastq_2, checkIfExists: true) ] ] } - .into { ch_raw_reads_fastqc; - ch_raw_reads_trimgalore } + ch_design_reads_csv + .splitCsv(header:true, sep:',') + .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true), file(row.fastq_2, checkIfExists: true) ] ] } + .into { ch_raw_reads_fastqc; + ch_raw_reads_trimgalore } } /* * Create a channel with [sample_id, control id, antibody, replicatesExist, multipleGroups] */ - ch_design_controls_csv.splitCsv(header:true, sep:',') - .map { row -> [ row.sample_id, row.control_id, row.antibody, row.replicatesExist.toBoolean(), row.multipleGroups.toBoolean() ] } - .set { ch_design_controls_csv } +ch_design_controls_csv + .splitCsv(header:true, sep:',') + .map { row -> [ row.sample_id, row.control_id, row.antibody, row.replicatesExist.toBoolean(), row.multipleGroups.toBoolean() ] } + .set { ch_design_controls_csv } /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// @@ -617,10 +621,11 @@ process SortBAM { /* * STEP 4.1 Merge BAM files for all libraries from same replicate */ -ch_sort_bam_merge.map { it -> [ it[0].split('_')[0..-2].join('_'), it[1] ] } - .groupTuple(by: [0]) - .map { it -> [ it[0], it[1].flatten() ] } - .set { ch_sort_bam_merge } +ch_sort_bam_merge + .map { it -> [ it[0].split('_')[0..-2].join('_'), it[1] ] } + .groupTuple(by: [0]) + .map { it -> [ it[0], it[1].flatten() ] } + .set { ch_sort_bam_merge } process MergeBAM { tag "$name" @@ -754,16 +759,21 @@ process MergeBAMFilter { * STEP 4.3 Remove orphan reads from paired-end BAM file */ if (params.single_end) { - ch_filter_bam.into { ch_rm_orphan_bam_metrics; - ch_rm_orphan_bam_bigwig; - ch_rm_orphan_bam_macs_1; - ch_rm_orphan_bam_macs_2; - ch_rm_orphan_bam_phantompeakqualtools; - ch_rm_orphan_name_bam_counts } - ch_filter_bam_flagstat.into { ch_rm_orphan_flagstat_bigwig; - ch_rm_orphan_flagstat_macs; - ch_rm_orphan_flagstat_mqc } - ch_filter_bam_stats_mqc.set { ch_rm_orphan_stats_mqc } + ch_filter_bam + .into { ch_rm_orphan_bam_metrics; + ch_rm_orphan_bam_bigwig; + ch_rm_orphan_bam_macs_1; + ch_rm_orphan_bam_macs_2; + ch_rm_orphan_bam_phantompeakqualtools; + ch_rm_orphan_name_bam_counts } + + ch_filter_bam_flagstat + .into { ch_rm_orphan_flagstat_bigwig; + ch_rm_orphan_flagstat_macs; + ch_rm_orphan_flagstat_mqc } + + ch_filter_bam_stats_mqc + .set { ch_rm_orphan_stats_mqc } } else { process MergeBAMRemoveOrphan { tag "$name" @@ -1001,15 +1011,18 @@ process PhantomPeakQualTools { /////////////////////////////////////////////////////////////////////////////// // Create channel linking IP bams with control bams -ch_rm_orphan_bam_macs_1.combine(ch_rm_orphan_bam_macs_2) - .set { ch_rm_orphan_bam_macs_1 } -ch_design_controls_csv.combine(ch_rm_orphan_bam_macs_1) - .filter { it[0] == it[5] && it[1] == it[7] } - .join(ch_rm_orphan_flagstat_macs) - .map { it -> it[2..-1] } - .into { ch_group_bam_macs; - ch_group_bam_plotfingerprint; - ch_group_bam_deseq } +ch_rm_orphan_bam_macs_1 + .combine(ch_rm_orphan_bam_macs_2) + .set { ch_rm_orphan_bam_macs_1 } + +ch_design_controls_csv + .combine(ch_rm_orphan_bam_macs_1) + .filter { it[0] == it[5] && it[1] == it[7] } + .join(ch_rm_orphan_flagstat_macs) + .map { it -> it[2..-1] } + .into { ch_group_bam_macs; + ch_group_bam_plotfingerprint; + ch_group_bam_deseq } /* * STEP 6.1 deepTools plotFingerprint @@ -1177,10 +1190,11 @@ process PeakQC { /////////////////////////////////////////////////////////////////////////////// // group by ip from this point and carry forward boolean variables -ch_macs_consensus.map { it -> [ it[0], it[1], it[2], it[-1] ] } - .groupTuple() - .map { it -> [ it[0], it[1][0], it[2][0], it[3].sort() ] } - .set { ch_macs_consensus } +ch_macs_consensus + .map { it -> [ it[0], it[1], it[2], it[-1] ] } + .groupTuple() + .map { it -> [ it[0], it[1][0], it[2][0], it[3].sort() ] } + .set { ch_macs_consensus } /* * STEP 7.1 Consensus peaks across samples, create boolean filtering file, .saf file for featureCounts and UpSetR plot for intersection @@ -1272,13 +1286,14 @@ process ConsensusPeakSetAnnotate { } // get bam and saf files for each ip -ch_group_bam_deseq.map { it -> [ it[3], [ it[0], it[1], it[2] ] ] } - .join(ch_rm_orphan_name_bam_counts) - .map { it -> [ it[1][0], it[1][1], it[1][2], it[2] ] } - .groupTuple() - .map { it -> [ it[0], it[1][0], it[2][0], it[3].flatten().sort() ] } - .join(ch_macs_consensus_saf) - .set { ch_group_bam_deseq } +ch_group_bam_deseq + .map { it -> [ it[3], [ it[0], it[1], it[2] ] ] } + .join(ch_rm_orphan_name_bam_counts) + .map { it -> [ it[1][0], it[1][1], it[1][2], it[2] ] } + .groupTuple() + .map { it -> [ it[0], it[1][0], it[2][0], it[3].flatten().sort() ] } + .join(ch_macs_consensus_saf) + .set { ch_group_bam_deseq } /* * STEP 7.3 Count reads in consensus peaks with featureCounts and perform differential analysis with DESeq2 @@ -1604,13 +1619,13 @@ workflow.onComplete { } // Write summary e-mail HTML to a file - def output_d = new File( "${params.outdir}/pipeline_info/" ) + def output_d = new File("${params.outdir}/pipeline_info/") if (!output_d.exists()) { output_d.mkdirs() } - def output_hf = new File( output_d, "pipeline_report.html" ) + def output_hf = new File(output_d, "pipeline_report.html") output_hf.withWriter { w -> w << email_html } - def output_tf = new File( output_d, "pipeline_report.txt" ) + def output_tf = new File(output_d, "pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } c_reset = params.monochrome_logs ? '' : "\033[0m"; From d591a96a53151e646adaf137c3ccf51188ce1be3 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 13:52:31 +0100 Subject: [PATCH 072/113] Make peaktype global variable --- main.nf | 43 +++++++++++++++++++------------------------ 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/main.nf b/main.nf index 5d36ccfa..1413bfb2 100755 --- a/main.nf +++ b/main.nf @@ -119,6 +119,9 @@ params.gene_bed = params.genome ? params.genomes[ params.genome ].bed12 ?: false params.macs_gsize = params.genome ? params.genomes[ params.genome ].macs_gsize ?: false : false params.blacklist = params.genome ? params.genomes[ params.genome ].blacklist ?: false : false +// Global variables +def PEAK_TYPE = params.narrow_peak ? "narrowPeak" : "broadPeak" + // Has the run name been specified by the user? // this has the bonus effect of catching both -name and --name custom_runName = params.name @@ -1065,7 +1068,7 @@ process PlotFingerprint { process MACSCallPeak { tag "${ip} vs ${control}" label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}", mode: 'copy', + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}", mode: 'copy', saveAs: { filename -> if (filename.endsWith(".tsv")) "qc/$filename" else if (filename.endsWith(".igv.txt")) null @@ -1082,12 +1085,11 @@ process MACSCallPeak { output: set val(ip), file("*.{bed,xls,gappedPeak,bdg}") into ch_macs_output - set val(antibody), val(replicatesExist), val(multipleGroups), val(ip), val(control), file("*.$peaktype") into ch_macs_homer, ch_macs_qc, ch_macs_consensus + set val(antibody), val(replicatesExist), val(multipleGroups), val(ip), val(control), file("*.$PEAK_TYPE") into ch_macs_homer, ch_macs_qc, ch_macs_consensus file "*igv.txt" into ch_macs_igv file "*_mqc.tsv" into ch_macs_mqc script: - peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" broad = params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" format = params.single_end ? "BAM" : "BAMPE" pileup = params.save_macs_pileup ? "-B --SPMR" : "" @@ -1102,12 +1104,12 @@ process MACSCallPeak { $pileup \\ --keep-dup all - cat ${ip}_peaks.${peaktype} | wc -l | awk -v OFS='\t' '{ print "${ip}", \$1 }' | cat $peak_count_header - > ${ip}_peaks.count_mqc.tsv + cat ${ip}_peaks.${PEAK_TYPE} | wc -l | awk -v OFS='\t' '{ print "${ip}", \$1 }' | cat $peak_count_header - > ${ip}_peaks.count_mqc.tsv - READS_IN_PEAKS=\$(intersectBed -a ${ipbam[0]} -b ${ip}_peaks.${peaktype} -bed -c -f 0.20 | awk -F '\t' '{sum += \$NF} END {print sum}') + READS_IN_PEAKS=\$(intersectBed -a ${ipbam[0]} -b ${ip}_peaks.${PEAK_TYPE} -bed -c -f 0.20 | awk -F '\t' '{sum += \$NF} END {print sum}') grep 'mapped (' $ipflagstat | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${ip}", a/\$1}' | cat $frip_score_header - > ${ip}_peaks.FRiP_mqc.tsv - find * -type f -name "*.${peaktype}" -exec echo -e "bwa/mergedLibrary/macs/${peaktype}/"{}"\\t0,0,178" \\; > ${ip}_peaks.igv.txt + find * -type f -name "*.${PEAK_TYPE}" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/"{}"\\t0,0,178" \\; > ${ip}_peaks.igv.txt """ } @@ -1117,7 +1119,7 @@ process MACSCallPeak { process AnnotatePeaks { tag "${ip} vs ${control}" label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}", mode: 'copy' + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}", mode: 'copy' when: params.macs_gsize @@ -1131,7 +1133,6 @@ process AnnotatePeaks { file "*.txt" into ch_macs_annotate script: - peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" """ annotatePeaks.pl \\ $peak \\ @@ -1148,7 +1149,7 @@ process AnnotatePeaks { */ process PeakQC { label "process_medium" - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/qc", mode: 'copy' + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/qc", mode: 'copy' when: params.macs_gsize @@ -1163,11 +1164,10 @@ process PeakQC { file "*.tsv" into ch_macs_qc_mqc script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" """ plot_macs_qc.r \\ -i ${peaks.join(',')} \\ - -s ${peaks.join(',').replaceAll("_peaks.${peaktype}","")} \\ + -s ${peaks.join(',').replaceAll("_peaks.${PEAK_TYPE}","")} \\ -o ./ \\ -p macs_peak @@ -1202,7 +1202,7 @@ ch_macs_consensus process ConsensusPeakSet { tag "${antibody}" label 'process_long' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}", mode: 'copy', + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}", mode: 'copy', saveAs: { filename -> if (filename.endsWith(".igv.txt")) null else filename @@ -1223,7 +1223,6 @@ process ConsensusPeakSet { script: // scripts are bundled with the pipeline, in nf-core/chipseq/bin/ prefix = "${antibody}.consensus_peaks" - peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" mergecols = params.narrow_peak ? (2..10).join(',') : (2..9).join(',') collapsecols = params.narrow_peak ? (["collapse"]*9).join(',') : (["collapse"]*8).join(',') expandparam = params.narrow_peak ? "--is_narrow_peak" : "" @@ -1232,7 +1231,7 @@ process ConsensusPeakSet { | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt macs2_merged_expand.py ${prefix}.txt \\ - ${peaks.collect{it.toString()}.sort().join(',').replaceAll("_peaks.${peaktype}","")} \\ + ${peaks.collect{it.toString()}.sort().join(',').replaceAll("_peaks.${PEAK_TYPE}","")} \\ ${prefix}.boolean.txt \\ --min_replicates $params.min_reps_consensus \\ $expandparam @@ -1244,7 +1243,7 @@ process ConsensusPeakSet { plot_peak_intersect.r -i ${prefix}.boolean.intersect.txt -o ${prefix}.boolean.intersect.plot.pdf - find * -type f -name "${prefix}.bed" -exec echo -e "bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}/"{}"\\t0,0,0" \\; > ${prefix}.bed.igv.txt + find * -type f -name "${prefix}.bed" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}/"{}"\\t0,0,0" \\; > ${prefix}.bed.igv.txt """ } @@ -1254,7 +1253,7 @@ process ConsensusPeakSet { process ConsensusPeakSetAnnotate { tag "${antibody}" label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}", mode: 'copy' + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}", mode: 'copy' when: params.macs_gsize && (replicatesExist || multipleGroups) @@ -1270,7 +1269,6 @@ process ConsensusPeakSetAnnotate { script: prefix = "${antibody}.consensus_peaks" - peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" """ annotatePeaks.pl \\ $bed \\ @@ -1301,7 +1299,7 @@ ch_group_bam_deseq process ConsensusPeakSetDESeq { tag "${antibody}" label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}/deseq2", mode: 'copy', + publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}/deseq2", mode: 'copy', saveAs: { filename -> if (filename.endsWith(".igv.txt")) null else filename @@ -1327,7 +1325,6 @@ process ConsensusPeakSetDESeq { script: prefix = "${antibody}.consensus_peaks" - peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() bam_ext = params.single_end ? ".mLb.clN.sorted.bam" : ".mLb.clN.bam" pe_params = params.single_end ? '' : "-p --donotsort" @@ -1352,7 +1349,7 @@ process ConsensusPeakSetDESeq { sed -i -e 's/DESeq2:/${antibody} DESeq2:/g' tmp.txt cat tmp.txt ${prefix}.sample.dists.txt > ${prefix}.sample.dists_mqc.tsv - find * -type f -name "*.FDR0.05.results.bed" -exec echo -e "bwa/mergedLibrary/macs/${peaktype}/consensus/${antibody}/deseq2/"{}"\\t255,0,0" \\; > ${prefix}.igv.txt + find * -type f -name "*.FDR0.05.results.bed" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}/deseq2/"{}"\\t255,0,0" \\; > ${prefix}.igv.txt """ } @@ -1368,7 +1365,7 @@ process ConsensusPeakSetDESeq { * STEP 8 - Create IGV session file */ process IGV { - publishDir "${params.outdir}/igv/${peaktype}", mode: 'copy' + publishDir "${params.outdir}/igv/${PEAK_TYPE}", mode: 'copy' when: !params.skip_igv @@ -1384,7 +1381,6 @@ process IGV { file "*.{txt,xml}" into ch_igv_session script: // scripts are bundled with the pipeline, in nf-core/chipseq/bin/ - peaktype = params.narrow_peak ? "narrowPeak" : "broadPeak" """ cat *.txt > igv_files.txt igv_files_to_session.py igv_session.xml igv_files.txt ../../reference_genome/${fasta.getName()} --path_prefix '../../' @@ -1458,7 +1454,7 @@ ${summary.collect { k,v -> "
$k
${v ?: ' Date: Fri, 18 Oct 2019 16:13:24 +0100 Subject: [PATCH 073/113] Add tool citations --- README.md | 2 + docs/citations.md | 94 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 docs/citations.md diff --git a/README.md b/README.md index db6bca9a..e33561c0 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,8 @@ For further information or help, don't hesitate to get in touch on [Slack](https ## Citation +A list of references for the tools used in the pipeline can be found in the [citations](docs/citations.md) file. + If you use nf-core/chipseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3240506](https://doi.org/10.5281/zenodo.3240506) You can cite the `nf-core` pre-print as follows: diff --git a/docs/citations.md b/docs/citations.md new file mode 100644 index 00000000..37048a7b --- /dev/null +++ b/docs/citations.md @@ -0,0 +1,94 @@ +# Pipeline tools + +[BWA](https://www.ncbi.nlm.nih.gov/pubmed/19451168/) +> Li H, Durbin R. Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics. 2009 Jul 15;25(14):1754-60. doi: 10.1093/bioinformatics/btp324. Epub 2009 May 18. PubMed PMID: 19451168; PubMed Central PMCID: PMC2705234. + +[BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) +> Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + +[SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) +> Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +[BamTools](https://www.ncbi.nlm.nih.gov/pubmed/21493652/) +> Barnett DW, Garrison EK, Quinlan AR, Strömberg MP, Marth GT. BamTools: a C++ API and toolkit for analyzing and managing BAM files. Bioinformatics. 2011 Jun 15;27(12):1691-2. doi: 10.1093/bioinformatics/btr174. Epub 2011 Apr 14. PubMed PMID: 21493652; PubMed Central PMCID: PMC3106182. + +[UCSC tools](https://www.ncbi.nlm.nih.gov/pubmed/20639541/) +> Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891. + +[preseq](https://www.ncbi.nlm.nih.gov/pubmed/23435259/) +> Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. Nat Methods. 2013 Apr;10(4):325-7. doi: 10.1038/nmeth.2375. Epub 2013 Feb 24. PubMed PMID: 23435259; PubMed Central PMCID: PMC3612374. + +[deepTools](https://www.ncbi.nlm.nih.gov/pubmed/27079975/) +> Ramírez F, Ryan DP, Grüning B, Bhardwaj V, Kilpert F, Richter AS, Heyne S, Dündar F, Manke T. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Res. 2016 Jul 8;44(W1):W160-5. doi: 10.1093/nar/gkw257. Epub 2016 Apr 13. PubMed PMID: 27079975; PubMed Central PMCID: PMC4987876. + +[MACS2](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) +> Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17. PubMed PMID: 18798982; PubMed Central PMCID: PMC2592715. + +[HOMER](https://www.ncbi.nlm.nih.gov/pubmed/20513432/) +> Heinz S, Benner C, Spann N, Bertolino E, Lin YC, Laslo P, Cheng JX, Murre C, Singh H, Glass CK. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Mol Cell. 2010 May 28;38(4):576-89. doi: 10.1016/j.molcel.2010.05.004. PubMed PMID: 20513432; PubMed Central PMCID: PMC2898526. + +[phantompeakqualtools](https://www.ncbi.nlm.nih.gov/pubmed/22955991/) +> Landt SG, Marinov GK, Kundaje A, Kheradpour P, Pauli F, Batzoglou S, Bernstein BE, Bickel P, Brown JB, Cayting P, Chen Y, DeSalvo G, Epstein C, Fisher-Aylor KI, Euskirchen G, Gerstein M, Gertz J, Hartemink AJ, Hoffman MM, Iyer VR, Jung YL, Karmakar S, Kellis M, Kharchenko PV, Li Q, Liu T, Liu XS, Ma L, Milosavljevic A, Myers RM, Park PJ, Pazin MJ, Perry MD, Raha D, Reddy TE, Rozowsky J, Shoresh N, Sidow A, Slattery M, Stamatoyannopoulos JA, Tolstorukov MY, White KP, Xi S, Farnham PJ, Lieb JD, Wold BJ, Snyder M. ChIP-seq guidelines and practices of the ENCODE and modENCODE consortia. Genome Res. 2012 Sep;22(9):1813-31. doi: 10.1101/gr.136184.111. PubMed PMID: 22955991; PubMed Central PMCID: PMC3431496. + +[featureCounts](https://www.ncbi.nlm.nih.gov/pubmed/24227677/) +> Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677. + +[DESeq2](https://www.ncbi.nlm.nih.gov/pubmed/25516281/) +> Love MI, Huber W, Anders S. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol. 2014;15(12):550. PubMed PMID: 25516281; PubMed Central PMCID: PMC4302049. + +[MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) +> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +[FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + +[Trim Galore!](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) + +[picard-tools](http://broadinstitute.github.io/picard) + +[pysam](https://github.com/pysam-developers/pysam) + +# R packages + +[R](https://www.R-project.org/) +> R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + +[optparse](https://CRAN.R-project.org/package=optparse) +> Trevor L Davis (2018). optparse: Command Line Option Parser. + +[RColorBrewer]: https://CRAN.R-project.org/package=RColorBrewer +> Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. + +[ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html) +> H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016. + +[reshape2](http://www.jstatsoft.org/v21/i12/) +> Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-20. + +[scales](https://CRAN.R-project.org/package=scales) +> Hadley Wickham (2018). scales: Scale Functions for Visualization. + +[pheatmap](https://CRAN.R-project.org/package=pheatmap) +> Raivo Kolde (2018). pheatmap: Pretty Heatmaps. + +[lattice](https://cran.r-project.org/web/packages/lattice/index.html) +> Sarkar, Deepayan (2008) Lattice: Multivariate Data Visualization with R. Springer, New York. ISBN 978-0-387-75968-5. + +[vsn](https://bioconductor.org/packages/release/bioc/html/vsn.html) +> Wolfgang Huber, Anja von Heydebreck, Holger Sueltmann, Annemarie Poustka and Martin Vingron. Variance Stabilization Applied to Microarray Data Calibration and to the Quantification of Differential Expression. Bioinformatics 18, S96-S104 (2002). + +[UpSetR](https://CRAN.R-project.org/package=UpSetR) +> Nils Gehlenborg (2017). UpSetR: A More Scalable Alternative to Venn and Euler Diagrams for Visualizing Intersecting Sets. + +[xfun](https://CRAN.R-project.org/package=xfun) +> Yihui Xie (2018). xfun: Miscellaneous Functions by 'Yihui Xie'. + +# Infrastructure tools + +[Bioconda](https://www.ncbi.nlm.nih.gov/pubmed/29967506/) +> Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +[Anaconda](https://anaconda.com) +> Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +[Singularity](https://www.ncbi.nlm.nih.gov/pubmed/28494014/) +> Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. From 7f90664fd6021f7b250f64fd0f422e43d55af644 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 23:46:06 +0100 Subject: [PATCH 074/113] Remove TODO string --- main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/main.nf b/main.nf index 1413bfb2..756c5420 100755 --- a/main.nf +++ b/main.nf @@ -1561,7 +1561,6 @@ workflow.onComplete { email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - // TODO nf-core: If not using MultiQC, strip out this code (including params.max_multiqc_email_size) // On success try attach the multiqc report def mqc_report = null try { From 5c1b6ddb7351c675ec281c9fd84a1208a4581e88 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 23:46:13 +0100 Subject: [PATCH 075/113] Fix markdown --- docs/citations.md | 107 +++++++++++++++++++++++++++++----------------- 1 file changed, 68 insertions(+), 39 deletions(-) diff --git a/docs/citations.md b/docs/citations.md index 37048a7b..832cb39b 100644 --- a/docs/citations.md +++ b/docs/citations.md @@ -1,94 +1,123 @@ -# Pipeline tools +# Citations + +## Pipeline tools + +* [BWA](https://www.ncbi.nlm.nih.gov/pubmed/19451168/) -[BWA](https://www.ncbi.nlm.nih.gov/pubmed/19451168/) > Li H, Durbin R. Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics. 2009 Jul 15;25(14):1754-60. doi: 10.1093/bioinformatics/btp324. Epub 2009 May 18. PubMed PMID: 19451168; PubMed Central PMCID: PMC2705234. -[BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) +* [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. -[SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) +* [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. -[BamTools](https://www.ncbi.nlm.nih.gov/pubmed/21493652/) +* [BamTools](https://www.ncbi.nlm.nih.gov/pubmed/21493652/) + > Barnett DW, Garrison EK, Quinlan AR, Strömberg MP, Marth GT. BamTools: a C++ API and toolkit for analyzing and managing BAM files. Bioinformatics. 2011 Jun 15;27(12):1691-2. doi: 10.1093/bioinformatics/btr174. Epub 2011 Apr 14. PubMed PMID: 21493652; PubMed Central PMCID: PMC3106182. -[UCSC tools](https://www.ncbi.nlm.nih.gov/pubmed/20639541/) +* [UCSC tools](https://www.ncbi.nlm.nih.gov/pubmed/20639541/) + > Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891. -[preseq](https://www.ncbi.nlm.nih.gov/pubmed/23435259/) +* [preseq](https://www.ncbi.nlm.nih.gov/pubmed/23435259/) + > Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. Nat Methods. 2013 Apr;10(4):325-7. doi: 10.1038/nmeth.2375. Epub 2013 Feb 24. PubMed PMID: 23435259; PubMed Central PMCID: PMC3612374. -[deepTools](https://www.ncbi.nlm.nih.gov/pubmed/27079975/) +* [deepTools](https://www.ncbi.nlm.nih.gov/pubmed/27079975/) + > Ramírez F, Ryan DP, Grüning B, Bhardwaj V, Kilpert F, Richter AS, Heyne S, Dündar F, Manke T. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Res. 2016 Jul 8;44(W1):W160-5. doi: 10.1093/nar/gkw257. Epub 2016 Apr 13. PubMed PMID: 27079975; PubMed Central PMCID: PMC4987876. -[MACS2](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) +* [MACS2](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) + > Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17. PubMed PMID: 18798982; PubMed Central PMCID: PMC2592715. -[HOMER](https://www.ncbi.nlm.nih.gov/pubmed/20513432/) +* [HOMER](https://www.ncbi.nlm.nih.gov/pubmed/20513432/) + > Heinz S, Benner C, Spann N, Bertolino E, Lin YC, Laslo P, Cheng JX, Murre C, Singh H, Glass CK. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Mol Cell. 2010 May 28;38(4):576-89. doi: 10.1016/j.molcel.2010.05.004. PubMed PMID: 20513432; PubMed Central PMCID: PMC2898526. -[phantompeakqualtools](https://www.ncbi.nlm.nih.gov/pubmed/22955991/) +* [phantompeakqualtools](https://www.ncbi.nlm.nih.gov/pubmed/22955991/) + > Landt SG, Marinov GK, Kundaje A, Kheradpour P, Pauli F, Batzoglou S, Bernstein BE, Bickel P, Brown JB, Cayting P, Chen Y, DeSalvo G, Epstein C, Fisher-Aylor KI, Euskirchen G, Gerstein M, Gertz J, Hartemink AJ, Hoffman MM, Iyer VR, Jung YL, Karmakar S, Kellis M, Kharchenko PV, Li Q, Liu T, Liu XS, Ma L, Milosavljevic A, Myers RM, Park PJ, Pazin MJ, Perry MD, Raha D, Reddy TE, Rozowsky J, Shoresh N, Sidow A, Slattery M, Stamatoyannopoulos JA, Tolstorukov MY, White KP, Xi S, Farnham PJ, Lieb JD, Wold BJ, Snyder M. ChIP-seq guidelines and practices of the ENCODE and modENCODE consortia. Genome Res. 2012 Sep;22(9):1813-31. doi: 10.1101/gr.136184.111. PubMed PMID: 22955991; PubMed Central PMCID: PMC3431496. -[featureCounts](https://www.ncbi.nlm.nih.gov/pubmed/24227677/) +* [featureCounts](https://www.ncbi.nlm.nih.gov/pubmed/24227677/) + > Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677. -[DESeq2](https://www.ncbi.nlm.nih.gov/pubmed/25516281/) -> Love MI, Huber W, Anders S. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol. 2014;15(12):550. PubMed PMID: 25516281; PubMed Central PMCID: PMC4302049. +* [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) -[MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. -[FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) -[Trim Galore!](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) +* [Trim Galore!](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) -[picard-tools](http://broadinstitute.github.io/picard) +* [picard-tools](http://broadinstitute.github.io/picard) -[pysam](https://github.com/pysam-developers/pysam) +* [pysam](https://github.com/pysam-developers/pysam) -# R packages +## R packages + +* [R](https://www.R-project.org/) -[R](https://www.R-project.org/) > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. -[optparse](https://CRAN.R-project.org/package=optparse) -> Trevor L Davis (2018). optparse: Command Line Option Parser. +* [DESeq2](https://www.ncbi.nlm.nih.gov/pubmed/25516281/) -[RColorBrewer]: https://CRAN.R-project.org/package=RColorBrewer -> Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. +> Love MI, Huber W, Anders S. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol. 2014;15(12):550. PubMed PMID: 25516281; PubMed Central PMCID: PMC4302049. + +* [vsn](https://bioconductor.org/packages/release/bioc/html/vsn.html) + +> Wolfgang Huber, Anja von Heydebreck, Holger Sueltmann, Annemarie Poustka and Martin Vingron. Variance Stabilization Applied to Microarray Data Calibration and to the Quantification of Differential Expression. Bioinformatics 18, S96-S104 (2002). + +* [UpSetR](https://CRAN.R-project.org/package=UpSetR) + +> Nils Gehlenborg (2017). UpSetR: A More Scalable Alternative to Venn and Euler Diagrams for Visualizing Intersecting Sets. + +* [ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html) -[ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html) > H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016. -[reshape2](http://www.jstatsoft.org/v21/i12/) +* [reshape2](http://www.jstatsoft.org/v21/i12/) + > Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-20. -[scales](https://CRAN.R-project.org/package=scales) +* [scales](https://CRAN.R-project.org/package=scales) + > Hadley Wickham (2018). scales: Scale Functions for Visualization. -[pheatmap](https://CRAN.R-project.org/package=pheatmap) +* [pheatmap](https://CRAN.R-project.org/package=pheatmap) + > Raivo Kolde (2018). pheatmap: Pretty Heatmaps. -[lattice](https://cran.r-project.org/web/packages/lattice/index.html) +* [lattice](https://cran.r-project.org/web/packages/lattice/index.html) + > Sarkar, Deepayan (2008) Lattice: Multivariate Data Visualization with R. Springer, New York. ISBN 978-0-387-75968-5. -[vsn](https://bioconductor.org/packages/release/bioc/html/vsn.html) -> Wolfgang Huber, Anja von Heydebreck, Holger Sueltmann, Annemarie Poustka and Martin Vingron. Variance Stabilization Applied to Microarray Data Calibration and to the Quantification of Differential Expression. Bioinformatics 18, S96-S104 (2002). +* [RColorBrewer](https://CRAN.R-project.org/package=RColorBrewer) -[UpSetR](https://CRAN.R-project.org/package=UpSetR) -> Nils Gehlenborg (2017). UpSetR: A More Scalable Alternative to Venn and Euler Diagrams for Visualizing Intersecting Sets. +> Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. + +* [optparse](https://CRAN.R-project.org/package=optparse) + +> Trevor L Davis (2018). optparse: Command Line Option Parser. + +* [xfun](https://CRAN.R-project.org/package=xfun) -[xfun](https://CRAN.R-project.org/package=xfun) > Yihui Xie (2018). xfun: Miscellaneous Functions by 'Yihui Xie'. -# Infrastructure tools +## Infrastructure tools + +* [Bioconda](https://www.ncbi.nlm.nih.gov/pubmed/29967506/) -[Bioconda](https://www.ncbi.nlm.nih.gov/pubmed/29967506/) > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. -[Anaconda](https://anaconda.com) +* [Anaconda](https://anaconda.com) + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. -[Singularity](https://www.ncbi.nlm.nih.gov/pubmed/28494014/) +* [Singularity](https://www.ncbi.nlm.nih.gov/pubmed/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. From 8009eea76899585a411edc6be31b111a2841d7ba Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 23:50:33 +0100 Subject: [PATCH 076/113] Fix indents --- docs/citations.md | 81 ++++++++++++++++------------------------------- 1 file changed, 27 insertions(+), 54 deletions(-) diff --git a/docs/citations.md b/docs/citations.md index 832cb39b..6db995ba 100644 --- a/docs/citations.md +++ b/docs/citations.md @@ -3,52 +3,40 @@ ## Pipeline tools * [BWA](https://www.ncbi.nlm.nih.gov/pubmed/19451168/) - -> Li H, Durbin R. Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics. 2009 Jul 15;25(14):1754-60. doi: 10.1093/bioinformatics/btp324. Epub 2009 May 18. PubMed PMID: 19451168; PubMed Central PMCID: PMC2705234. + > Li H, Durbin R. Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics. 2009 Jul 15;25(14):1754-60. doi: 10.1093/bioinformatics/btp324. Epub 2009 May 18. PubMed PMID: 19451168; PubMed Central PMCID: PMC2705234. * [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) - -> Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. * [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) - -> Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. * [BamTools](https://www.ncbi.nlm.nih.gov/pubmed/21493652/) - -> Barnett DW, Garrison EK, Quinlan AR, Strömberg MP, Marth GT. BamTools: a C++ API and toolkit for analyzing and managing BAM files. Bioinformatics. 2011 Jun 15;27(12):1691-2. doi: 10.1093/bioinformatics/btr174. Epub 2011 Apr 14. PubMed PMID: 21493652; PubMed Central PMCID: PMC3106182. + > Barnett DW, Garrison EK, Quinlan AR, Strömberg MP, Marth GT. BamTools: a C++ API and toolkit for analyzing and managing BAM files. Bioinformatics. 2011 Jun 15;27(12):1691-2. doi: 10.1093/bioinformatics/btr174. Epub 2011 Apr 14. PubMed PMID: 21493652; PubMed Central PMCID: PMC3106182. * [UCSC tools](https://www.ncbi.nlm.nih.gov/pubmed/20639541/) - -> Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891. + > Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891. * [preseq](https://www.ncbi.nlm.nih.gov/pubmed/23435259/) - -> Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. Nat Methods. 2013 Apr;10(4):325-7. doi: 10.1038/nmeth.2375. Epub 2013 Feb 24. PubMed PMID: 23435259; PubMed Central PMCID: PMC3612374. + > Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. Nat Methods. 2013 Apr;10(4):325-7. doi: 10.1038/nmeth.2375. Epub 2013 Feb 24. PubMed PMID: 23435259; PubMed Central PMCID: PMC3612374. * [deepTools](https://www.ncbi.nlm.nih.gov/pubmed/27079975/) - -> Ramírez F, Ryan DP, Grüning B, Bhardwaj V, Kilpert F, Richter AS, Heyne S, Dündar F, Manke T. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Res. 2016 Jul 8;44(W1):W160-5. doi: 10.1093/nar/gkw257. Epub 2016 Apr 13. PubMed PMID: 27079975; PubMed Central PMCID: PMC4987876. + > Ramírez F, Ryan DP, Grüning B, Bhardwaj V, Kilpert F, Richter AS, Heyne S, Dündar F, Manke T. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Res. 2016 Jul 8;44(W1):W160-5. doi: 10.1093/nar/gkw257. Epub 2016 Apr 13. PubMed PMID: 27079975; PubMed Central PMCID: PMC4987876. * [MACS2](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) - -> Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17. PubMed PMID: 18798982; PubMed Central PMCID: PMC2592715. + > Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17. PubMed PMID: 18798982; PubMed Central PMCID: PMC2592715. * [HOMER](https://www.ncbi.nlm.nih.gov/pubmed/20513432/) - -> Heinz S, Benner C, Spann N, Bertolino E, Lin YC, Laslo P, Cheng JX, Murre C, Singh H, Glass CK. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Mol Cell. 2010 May 28;38(4):576-89. doi: 10.1016/j.molcel.2010.05.004. PubMed PMID: 20513432; PubMed Central PMCID: PMC2898526. + > Heinz S, Benner C, Spann N, Bertolino E, Lin YC, Laslo P, Cheng JX, Murre C, Singh H, Glass CK. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Mol Cell. 2010 May 28;38(4):576-89. doi: 10.1016/j.molcel.2010.05.004. PubMed PMID: 20513432; PubMed Central PMCID: PMC2898526. * [phantompeakqualtools](https://www.ncbi.nlm.nih.gov/pubmed/22955991/) - -> Landt SG, Marinov GK, Kundaje A, Kheradpour P, Pauli F, Batzoglou S, Bernstein BE, Bickel P, Brown JB, Cayting P, Chen Y, DeSalvo G, Epstein C, Fisher-Aylor KI, Euskirchen G, Gerstein M, Gertz J, Hartemink AJ, Hoffman MM, Iyer VR, Jung YL, Karmakar S, Kellis M, Kharchenko PV, Li Q, Liu T, Liu XS, Ma L, Milosavljevic A, Myers RM, Park PJ, Pazin MJ, Perry MD, Raha D, Reddy TE, Rozowsky J, Shoresh N, Sidow A, Slattery M, Stamatoyannopoulos JA, Tolstorukov MY, White KP, Xi S, Farnham PJ, Lieb JD, Wold BJ, Snyder M. ChIP-seq guidelines and practices of the ENCODE and modENCODE consortia. Genome Res. 2012 Sep;22(9):1813-31. doi: 10.1101/gr.136184.111. PubMed PMID: 22955991; PubMed Central PMCID: PMC3431496. + > Landt SG, Marinov GK, Kundaje A, Kheradpour P, Pauli F, Batzoglou S, Bernstein BE, Bickel P, Brown JB, Cayting P, Chen Y, DeSalvo G, Epstein C, Fisher-Aylor KI, Euskirchen G, Gerstein M, Gertz J, Hartemink AJ, Hoffman MM, Iyer VR, Jung YL, Karmakar S, Kellis M, Kharchenko PV, Li Q, Liu T, Liu XS, Ma L, Milosavljevic A, Myers RM, Park PJ, Pazin MJ, Perry MD, Raha D, Reddy TE, Rozowsky J, Shoresh N, Sidow A, Slattery M, Stamatoyannopoulos JA, Tolstorukov MY, White KP, Xi S, Farnham PJ, Lieb JD, Wold BJ, Snyder M. ChIP-seq guidelines and practices of the ENCODE and modENCODE consortia. Genome Res. 2012 Sep;22(9):1813-31. doi: 10.1101/gr.136184.111. PubMed PMID: 22955991; PubMed Central PMCID: PMC3431496. * [featureCounts](https://www.ncbi.nlm.nih.gov/pubmed/24227677/) - -> Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677. + > Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677. * [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) - -> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. * [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) @@ -61,63 +49,48 @@ ## R packages * [R](https://www.R-project.org/) - -> R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. * [DESeq2](https://www.ncbi.nlm.nih.gov/pubmed/25516281/) - -> Love MI, Huber W, Anders S. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol. 2014;15(12):550. PubMed PMID: 25516281; PubMed Central PMCID: PMC4302049. + > Love MI, Huber W, Anders S. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol. 2014;15(12):550. PubMed PMID: 25516281; PubMed Central PMCID: PMC4302049. * [vsn](https://bioconductor.org/packages/release/bioc/html/vsn.html) - -> Wolfgang Huber, Anja von Heydebreck, Holger Sueltmann, Annemarie Poustka and Martin Vingron. Variance Stabilization Applied to Microarray Data Calibration and to the Quantification of Differential Expression. Bioinformatics 18, S96-S104 (2002). + > Wolfgang Huber, Anja von Heydebreck, Holger Sueltmann, Annemarie Poustka and Martin Vingron. Variance Stabilization Applied to Microarray Data Calibration and to the Quantification of Differential Expression. Bioinformatics 18, S96-S104 (2002). * [UpSetR](https://CRAN.R-project.org/package=UpSetR) - -> Nils Gehlenborg (2017). UpSetR: A More Scalable Alternative to Venn and Euler Diagrams for Visualizing Intersecting Sets. + > Nils Gehlenborg (2017). UpSetR: A More Scalable Alternative to Venn and Euler Diagrams for Visualizing Intersecting Sets. * [ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html) - -> H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016. + > H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016. * [reshape2](http://www.jstatsoft.org/v21/i12/) - -> Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-20. + > Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-20. * [scales](https://CRAN.R-project.org/package=scales) - -> Hadley Wickham (2018). scales: Scale Functions for Visualization. + > Hadley Wickham (2018). scales: Scale Functions for Visualization. * [pheatmap](https://CRAN.R-project.org/package=pheatmap) - -> Raivo Kolde (2018). pheatmap: Pretty Heatmaps. + > Raivo Kolde (2018). pheatmap: Pretty Heatmaps. * [lattice](https://cran.r-project.org/web/packages/lattice/index.html) - -> Sarkar, Deepayan (2008) Lattice: Multivariate Data Visualization with R. Springer, New York. ISBN 978-0-387-75968-5. + > Sarkar, Deepayan (2008) Lattice: Multivariate Data Visualization with R. Springer, New York. ISBN 978-0-387-75968-5. * [RColorBrewer](https://CRAN.R-project.org/package=RColorBrewer) - -> Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. + > Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. * [optparse](https://CRAN.R-project.org/package=optparse) - -> Trevor L Davis (2018). optparse: Command Line Option Parser. + > Trevor L Davis (2018). optparse: Command Line Option Parser. * [xfun](https://CRAN.R-project.org/package=xfun) - -> Yihui Xie (2018). xfun: Miscellaneous Functions by 'Yihui Xie'. + > Yihui Xie (2018). xfun: Miscellaneous Functions by 'Yihui Xie'. ## Infrastructure tools * [Bioconda](https://www.ncbi.nlm.nih.gov/pubmed/29967506/) - -> Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. * [Anaconda](https://anaconda.com) - -> Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. * [Singularity](https://www.ncbi.nlm.nih.gov/pubmed/28494014/) - -> Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. From 3a928d4feeca2619786d43935a2f4a8eaa5ffd99 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 23:55:38 +0100 Subject: [PATCH 077/113] Update README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e33561c0..428275ba 100644 --- a/README.md +++ b/README.md @@ -92,9 +92,9 @@ For further information or help, don't hesitate to get in touch on [Slack](https ## Citation -A list of references for the tools used in the pipeline can be found in the [citations](docs/citations.md) file. - If you use nf-core/chipseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3240506](https://doi.org/10.5281/zenodo.3240506) You can cite the `nf-core` pre-print as follows: > Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**. *bioRxiv*. 2019. p. 610741. [doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1). + +A list of references for the tools used in the pipeline can be found in the [citations](docs/citations.md) file. From 55ac73ea31a8b88b46e5624c0172b13a7f23099f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 18 Oct 2019 23:55:46 +0100 Subject: [PATCH 078/113] Add pipeline title --- docs/citations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/citations.md b/docs/citations.md index 6db995ba..288c456b 100644 --- a/docs/citations.md +++ b/docs/citations.md @@ -1,4 +1,4 @@ -# Citations +# nf-core/chipseq: Citations ## Pipeline tools From e01110606613f5b07d52061fe9c0d7a145ac60d4 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Sat, 19 Oct 2019 00:22:16 +0100 Subject: [PATCH 079/113] Update default text --- docs/usage.md | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 61a1a214..ed77216b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -225,14 +225,10 @@ It is not possible to run a mixture of single-end and paired-end files in one ru Sequencing center information that will be added to read groups in BAM files. ### `--fragment_size` -Number of base pairs to extend single-end reads when creating bigWig files. - -Default: `200` +Number of base pairs to extend single-end reads when creating bigWig files (Default: `200`). ### `--fingerprint_bins` -Number of genomic bins to use when generating the deepTools fingerprint plot. Larger numbers will give a smoother profile, but take longer to run. - -Default: `500000` +Number of genomic bins to use when generating the deepTools fingerprint plot. Larger numbers will give a smoother profile, but take longer to run (Default: `500000`). ## Reference genomes @@ -362,7 +358,7 @@ By default, intermediate BAM files will not be saved. The final BAM files create MACS2 is run by default with the [`--broad`](https://github.com/taoliu/MACS#--broad) flag. Specify this flag to call peaks in narrowPeak mode. ### `--broad_cutoff` -Specifies broad cut-off value for MACS2. Only used when `--narrow_peak` isnt specified. Default: 0.1 +Specifies broad cut-off value for MACS2. Only used when `--narrow_peak` isnt specified (Default: `0.1`). ### `--min_reps_consensus` Number of biological replicates required from a given condition for a peak to contribute to a consensus peak . If you are confident you have good reproducibility amongst your replicates then you can increase the value of this parameter to create a "reproducible" set of consensus of peaks. For example, a value of 2 will mean peaks that have been called in at least 2 replicates will contribute to the consensus set of peaks, and as such peaks that are unique to a given replicate will be discarded. @@ -425,7 +421,7 @@ Set this parameter to your e-mail address to get a summary e-mail with details o This works exactly as with `--email`, except emails are only sent if the workflow is not successful. ### `--max_multiqc_email_size` -Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB). +Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: `25MB`). ### `-name` Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. From cee28f5cc391a1f44188653a43082a394810acf1 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Sat, 19 Oct 2019 00:34:30 +0100 Subject: [PATCH 080/113] Update when statement --- main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.nf b/main.nf index 756c5420..edf83f82 100755 --- a/main.nf +++ b/main.nf @@ -1306,7 +1306,7 @@ process ConsensusPeakSetDESeq { } when: - params.macs_gsize && !params.skip_diff_analysis && replicatesExist && multipleGroups + params.macs_gsize && replicatesExist && multipleGroups && !params.skip_diff_analysis input: set val(antibody), val(replicatesExist), val(multipleGroups), file(bams) ,file(saf) from ch_group_bam_deseq From 08a9b6a761fddb448e69d904dca920f8245c9f08 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Sat, 19 Oct 2019 00:55:47 +0100 Subject: [PATCH 081/113] Add Docker URL --- docs/citations.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/docs/citations.md b/docs/citations.md index 288c456b..a6ff30d2 100644 --- a/docs/citations.md +++ b/docs/citations.md @@ -84,7 +84,7 @@ * [xfun](https://CRAN.R-project.org/package=xfun) > Yihui Xie (2018). xfun: Miscellaneous Functions by 'Yihui Xie'. -## Infrastructure tools +## Software packaging/container tools * [Bioconda](https://www.ncbi.nlm.nih.gov/pubmed/29967506/) > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. @@ -94,3 +94,5 @@ * [Singularity](https://www.ncbi.nlm.nih.gov/pubmed/28494014/) > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. + +* [Docker](https://www.docker.com/) From 37179c4994fdc2f04f5e51462d749cf4342ae0c6 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Sat, 19 Oct 2019 00:59:32 +0100 Subject: [PATCH 082/113] Correct spelling --- docs/citations.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/citations.md b/docs/citations.md index a6ff30d2..310922d4 100644 --- a/docs/citations.md +++ b/docs/citations.md @@ -84,7 +84,7 @@ * [xfun](https://CRAN.R-project.org/package=xfun) > Yihui Xie (2018). xfun: Miscellaneous Functions by 'Yihui Xie'. -## Software packaging/container tools +## Software packaging/containerisation tools * [Bioconda](https://www.ncbi.nlm.nih.gov/pubmed/29967506/) > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. From 71104619ddcab33e04781484310ca68fa807dc76 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Sat, 19 Oct 2019 01:13:31 +0100 Subject: [PATCH 083/113] Add Nextflow citation --- docs/citations.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/citations.md b/docs/citations.md index 310922d4..3a7e6242 100644 --- a/docs/citations.md +++ b/docs/citations.md @@ -2,6 +2,9 @@ ## Pipeline tools +* [Nextflow](https://www.ncbi.nlm.nih.gov/pubmed/28398311/) + > Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + * [BWA](https://www.ncbi.nlm.nih.gov/pubmed/19451168/) > Li H, Durbin R. Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics. 2009 Jul 15;25(14):1754-60. doi: 10.1093/bioinformatics/btp324. Epub 2009 May 18. PubMed PMID: 19451168; PubMed Central PMCID: PMC2705234. From c73721ab760c9c4b024f88ce4b31a55e9401aade Mon Sep 17 00:00:00 2001 From: drpatelh Date: Sat, 19 Oct 2019 11:30:06 +0100 Subject: [PATCH 084/113] Renamed citation file --- README.md | 3 ++- docs/{citations.md => citation.md} | 0 2 files changed, 2 insertions(+), 1 deletion(-) rename docs/{citations.md => citation.md} (100%) diff --git a/README.md b/README.md index 428275ba..62fe4789 100644 --- a/README.md +++ b/README.md @@ -97,4 +97,5 @@ If you use nf-core/chipseq for your analysis, please cite it using the following You can cite the `nf-core` pre-print as follows: > Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**. *bioRxiv*. 2019. p. 610741. [doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1). -A list of references for the tools used in the pipeline can be found in the [citations](docs/citations.md) file. +An extensive list of references for the tools used by the pipeline can be found in the [citation](docs/citation.md) file. + diff --git a/docs/citations.md b/docs/citation.md similarity index 100% rename from docs/citations.md rename to docs/citation.md From 8138759fcc2e5dfe95f857ffddd2a15f595ce803 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 21 Oct 2019 10:21:46 +0100 Subject: [PATCH 085/113] Rename citations file --- docs/citation.md => CITATIONS.md | 0 README.md | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename docs/citation.md => CITATIONS.md (100%) diff --git a/docs/citation.md b/CITATIONS.md similarity index 100% rename from docs/citation.md rename to CITATIONS.md diff --git a/README.md b/README.md index 62fe4789..1c9f1178 100644 --- a/README.md +++ b/README.md @@ -97,5 +97,5 @@ If you use nf-core/chipseq for your analysis, please cite it using the following You can cite the `nf-core` pre-print as follows: > Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**. *bioRxiv*. 2019. p. 610741. [doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1). -An extensive list of references for the tools used by the pipeline can be found in the [citation](docs/citation.md) file. +An extensive list of references for the tools used by the pipeline can be found in the [citation](CITATIONS.md) file. From 76c3d4bc85a8c90d3f0e16545c20e9dc415f7426 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 21 Oct 2019 11:06:49 +0100 Subject: [PATCH 086/113] Update README.md Co-Authored-By: Phil Ewels --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1c9f1178..eca59ed6 100644 --- a/README.md +++ b/README.md @@ -97,5 +97,5 @@ If you use nf-core/chipseq for your analysis, please cite it using the following You can cite the `nf-core` pre-print as follows: > Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**. *bioRxiv*. 2019. p. 610741. [doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1). -An extensive list of references for the tools used by the pipeline can be found in the [citation](CITATIONS.md) file. +An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. From a6351cb573540ed767b2ab6fa06ab53c66edfea8 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 21 Oct 2019 11:07:33 +0100 Subject: [PATCH 087/113] Update CHANGELOG.md Co-Authored-By: Phil Ewels --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b58b332..062cab34 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Add quick start information to main README * Update template to tools `1.7` * Add `--trim_nextseq` parameter +* Added `CITATIONS.md` file ### `Fixed` From 571f3f774037f944fb8352371de106cea306aab0 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 25 Oct 2019 16:46:50 +0100 Subject: [PATCH 088/113] Update to new markdownlint checks --- .github/markdownlint.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml index e052a635..96b12a70 100644 --- a/.github/markdownlint.yml +++ b/.github/markdownlint.yml @@ -1,9 +1,5 @@ # Markdownlint configuration file default: true, line-length: false -no-multiple-blanks: 0 -blanks-around-headers: false -blanks-around-lists: false -header-increment: false no-duplicate-header: siblings_only: true From 5268466fb0323f5575eb40b215675bf1ed976050 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 25 Oct 2019 16:46:53 +0100 Subject: [PATCH 089/113] Update to new markdownlint checks --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index eca59ed6..16bcee26 100644 --- a/README.md +++ b/README.md @@ -54,13 +54,15 @@ ii. Install one of [`docker`](https://docs.docker.com/engine/installation/), [`s iii. Download the pipeline and test it on a minimal dataset with a single command ```bash -nextflow run nf-core/chipseq -profile test, +nextflow run nf-core/chipseq -profile test, ``` +> Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile institute` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + iv. Start running your own analysis! ```bash -nextflow run nf-core/chipseq -profile --input design.csv --genome GRCh37 +nextflow run nf-core/chipseq -profile --input design.csv --genome GRCh37 ``` See [usage docs](docs/usage.md) for all of the available options when running the pipeline. @@ -98,4 +100,3 @@ You can cite the `nf-core` pre-print as follows: > Ewels PA, Peltzer A, Fillinger S, Alneberg JA, Patel H, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. **nf-core: Community curated bioinformatics pipelines**. *bioRxiv*. 2019. p. 610741. [doi: 10.1101/610741](https://www.biorxiv.org/content/10.1101/610741v1). An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. - From b78950f5a645ec4c87f10cb5450afe06d0636c8f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 25 Oct 2019 16:46:56 +0100 Subject: [PATCH 090/113] Update to new markdownlint checks --- docs/output.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/output.md b/docs/output.md index ba3db0cb..b797b869 100644 --- a/docs/output.md +++ b/docs/output.md @@ -3,6 +3,7 @@ This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. ## Pipeline overview + The pipeline is built using [Nextflow](https://www.nextflow.io/). See [`main README.md`](../README.md) for a condensed overview of the steps in the pipeline, and the bioinformatics tools used at each step. See [Illumina website](https://emea.illumina.com/techniques/sequencing/dna-sequencing/chip-seq.html) for more information regarding the ChIP-seq protocol, and for an extensive list of publications. From 7e80057be54f397bef25a65df8726939cf95203f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 25 Oct 2019 16:47:00 +0100 Subject: [PATCH 091/113] Update to new markdownlint checks --- docs/usage.md | 60 +++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index ed77216b..76f91076 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -66,8 +66,8 @@ * [`--multiqc_config`](#--multiqc_config) - ## Introduction + Nextflow handles job submissions on SLURM or other environments, and supervises running the jobs. Thus the Nextflow process must run until the pipeline is finished. We recommend that you put the process running in the background through `screen` / `tmux` or similar tool. Alternatively you can run nextflow within a cluster job submitted your job scheduler. It is recommended to limit the Nextflow Java virtual machines memory. We recommend adding the following line to your environment (typically in `~/.bashrc` or `~./bash_profile`): @@ -77,6 +77,7 @@ NXF_OPTS='-Xms1g -Xmx4g' ``` ## Running the pipeline + The typical command for running the pipeline is as follows: ```bash @@ -95,6 +96,7 @@ results # Finished results (configurable, see below) ``` ### Updating the pipeline + When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: ```bash @@ -102,16 +104,17 @@ nextflow pull nf-core/chipseq ``` ### Reproducibility + It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. First, go to the [nf-core/chipseq releases page](https://github.com/nf-core/chipseq/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. - ## Main arguments ### `-profile` + Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. Note that multiple profiles can be loaded, for example: `-profile docker` - the order of arguments is important! If `-profile` is not specified at all the pipeline will be run locally and expects all software to be installed and available on the `PATH`. @@ -132,6 +135,7 @@ If `-profile` is not specified at all the pipeline will be run locally and expec * Includes links to test data so needs no other parameters ### `--input` + You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. ```bash @@ -217,17 +221,21 @@ Example design files have been provided with the pipeline for [paired-end](../as ## Generic arguments ### `--single_end` + By default, the pipeline expects paired-end data. If you have single-end data, specify `--single_end` on the command line when you launch the pipeline. It is not possible to run a mixture of single-end and paired-end files in one run. ### `--seq_center` + Sequencing center information that will be added to read groups in BAM files. ### `--fragment_size` + Number of base pairs to extend single-end reads when creating bigWig files (Default: `200`). ### `--fingerprint_bins` + Number of genomic bins to use when generating the deepTools fingerprint plot. Larger numbers will give a smoother profile, but take longer to run (Default: `500000`). ## Reference genomes @@ -235,6 +243,7 @@ Number of genomic bins to use when generating the deepTools fingerprint plot. La The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource. ### `--genome` (using iGenomes) + There are 31 different species supported in the iGenomes references. To run the pipeline, you must specify which to use with the `--genome` flag. You can find the keys to specify the genomes in the [iGenomes config file](../conf/igenomes.config). Common genomes that are supported are: @@ -266,6 +275,7 @@ params { ``` ### `--fasta` + Full path to fasta file containing reference genome (*mandatory* if `--genome` is not specified). If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs. ```bash @@ -273,6 +283,7 @@ Full path to fasta file containing reference genome (*mandatory* if `--genome` i ``` ### `--gtf` + The full path to GTF file for annotating peaks (*mandatory* if `--genome` is not specified). Note that the GTF file should resemble the Ensembl format. ```bash @@ -280,6 +291,7 @@ The full path to GTF file for annotating peaks (*mandatory* if `--genome` is not ``` ### `--bwa_index` + Full path to an existing BWA index for your reference genome including the base name for the index. ```bash @@ -287,6 +299,7 @@ Full path to an existing BWA index for your reference genome including the base ``` ### `--gene_bed` + The full path to BED file for genome-wide gene intervals. This will be created from the GTF file if not specified. ```bash @@ -294,6 +307,7 @@ The full path to BED file for genome-wide gene intervals. This will be created f ``` ### `--tss_bed` + The full path to BED file for genome-wide transcription start sites. This will be created from the gene BED file if not specified. ```bash @@ -301,6 +315,7 @@ The full path to BED file for genome-wide transcription start sites. This will b ``` ### `--macs_gsize` + [Effective genome size](https://github.com/taoliu/MACS#-g--gsize) parameter required by MACS2. These have been provided when `--genome` is set as *GRCh37*, *GRCh38*, *GRCm38*, *WBcel235*, *BDGP6*, *R64-1-1*, *EF2*, *hg38*, *hg19* and *mm10*. For other genomes, if this parameter is not specified then the MACS2 peak-calling and differential analysis will be skipped. ```bash @@ -308,6 +323,7 @@ The full path to BED file for genome-wide transcription start sites. This will b ``` ### `--blacklist` + If provided, alignments that overlap with the regions in this file will be filtered out (see [ENCODE blacklists](https://sites.google.com/site/anshulkundaje/projects/blacklists)). The file should be in BED format. Blacklisted regions for *GRCh37*, *GRCh38*, *GRCm38*, *hg19*, *hg38*, *mm10* are bundled with the pipeline in the [`blacklists`](../assets/blacklists/) directory, and as such will be automatically used if any of those genomes are specified with the `--genome` parameter. ```bash @@ -315,12 +331,15 @@ If provided, alignments that overlap with the regions in this file will be filte ``` ### `--save_reference` + If the BWA index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times. ### `--igenomes_ignore` + Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`. ## Adapter trimming + The pipeline accepts a number of parameters to change how the trimming is done, according to your data type. You can specify custom trimming parameters as follows: @@ -336,31 +355,39 @@ You can specify custom trimming parameters as follows: * This enables the option Cutadapt `--nextseq-trim=3'CUTOFF` option via Trim Galore, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. ### `--skip_trimming` + Skip the adapter trimming step. Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data. ### `--save_trimmed` + By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete. ## Alignments -narrow_peak + ### `--keep_dups` + Duplicate reads are not filtered from alignments. ### `--keep_multi_map` + Reads mapping to multiple locations in the genome are not filtered from alignments. ### `--save_align_intermeds` + By default, intermediate BAM files will not be saved. The final BAM files created after the appropriate filtering step are always saved to limit storage usage. Set to true to also save other intermediate BAM files. ## Peaks ### `--narrow_peak` + MACS2 is run by default with the [`--broad`](https://github.com/taoliu/MACS#--broad) flag. Specify this flag to call peaks in narrowPeak mode. ### `--broad_cutoff` + Specifies broad cut-off value for MACS2. Only used when `--narrow_peak` isnt specified (Default: `0.1`). ### `--min_reps_consensus` + Number of biological replicates required from a given condition for a peak to contribute to a consensus peak . If you are confident you have good reproducibility amongst your replicates then you can increase the value of this parameter to create a "reproducible" set of consensus of peaks. For example, a value of 2 will mean peaks that have been called in at least 2 replicates will contribute to the consensus set of peaks, and as such peaks that are unique to a given replicate will be discarded. ```bash @@ -368,9 +395,11 @@ Number of biological replicates required from a given condition for a peak to co ``` ### `--save_macs_pileup` + Instruct MACS2 to create bedGraph files using the `-B --SPMR` parameters. ### `--skip_diff_analysis` + Skip read counting and differential analysis step. ## Skipping QC steps @@ -390,10 +419,13 @@ The following options make this easy: | `--skip_multiqc` | Skip MultiQC | ## Job resources + ### Automatic resubmission + Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. ### Custom resource requests + Wherever process-specific requirements are set in the pipeline, the default value can be changed by creating a custom config file. See the files hosted at [`nf-core/configs`](https://github.com/nf-core/configs/tree/master/conf) for examples. If you are likely to be running `nf-core` pipelines regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition below). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. @@ -401,10 +433,15 @@ If you are likely to be running `nf-core` pipelines regularly it may be a good i If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack/). ## AWS Batch specific parameters + Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use the `-awsbatch` profile and then specify all of the following parameters. + ### `--awsqueue` + The JobQueue that you intend to use on AWS Batch. + ### `--awsregion` + The AWS region to run your job in. Default is set to `eu-west-1` but can be adjusted to your needs. Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a S3 storage bucket of your choice - you'll get an error message notifying you if you didn't. @@ -412,18 +449,23 @@ Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a ## Other command line parameters ### `--outdir` + The output directory where the results will be saved. ### `--email` + Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. ### `--email_on_fail` + This works exactly as with `--email`, except emails are only sent if the workflow is not successful. ### `--max_multiqc_email_size` -Theshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: `25MB`). + +Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: `25MB`). ### `-name` + Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. This is used in the MultiQC report (if not default) and in the summary HTML / e-mail (always). @@ -431,6 +473,7 @@ This is used in the MultiQC report (if not default) and in the summary HTML / e- **NB:** Single hyphen (core Nextflow option) ### `-resume` + Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. @@ -438,6 +481,7 @@ You can also supply a run name to resume a specific run: `-resume [run-name]`. U **NB:** Single hyphen (core Nextflow option) ### `-c` + Specify the path to a specific config file (this is a core NextFlow command). **NB:** Single hyphen (core Nextflow option) @@ -445,6 +489,7 @@ Specify the path to a specific config file (this is a core NextFlow command). Note - you can use this to override pipeline defaults. ### `--custom_config_version` + Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default is set to `master`. ```bash @@ -453,6 +498,7 @@ Provide git commit id for custom Institutional configs hosted at `nf-core/config ``` ### `--custom_config_base` + If you're running offline, nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell nextflow where to find them with the @@ -473,22 +519,28 @@ nextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs > files + singularity containers + institutional configs in one go for you, to make this process easier. ### `--max_memory` + Use to set a top-limit for the default memory requirement for each process. Should be a string in the format integer-unit. eg. `--max_memory '8.GB'` ### `--max_time` + Use to set a top-limit for the default time requirement for each process. Should be a string in the format integer-unit. eg. `--max_time '2.h'` ### `--max_cpus` + Use to set a top-limit for the default CPU requirement for each process. Should be a string in the format integer-unit. eg. `--max_cpus 1` ### `--plaintext_email` + Set to receive plain-text e-mails instead of HTML formatted. ### `--monochrome_logs` + Set to disable colourful command line output and live life in monochrome. ### `--multiqc_config` + Specify a path to a custom MultiQC configuration file. From fe87f285d4fa122f630c0dfcac4f7aa53bd4ac3f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 29 Oct 2019 13:16:19 +0000 Subject: [PATCH 092/113] Change travis-ci.org to travis-ci.com --- .github/CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 2662b570..83bf5a89 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -26,7 +26,7 @@ If you're not used to this workflow with git, you can start with some [basic doc ## Tests -When you create a pull request with changes, [Travis CI](https://travis-ci.org/) will run automatic tests. +When you create a pull request with changes, [Travis CI](https://travis-ci.com/) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. There are typically two types of tests that run: From 38c1b9a2b19cb826508bf2b1ce42f26b3e14b544 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 29 Oct 2019 13:16:22 +0000 Subject: [PATCH 093/113] Change travis-ci.org to travis-ci.com --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 16bcee26..c81f8263 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # ![nf-core/chipseq](docs/images/nf-core-chipseq_logo.png) -[![Build Status](https://travis-ci.org/nf-core/chipseq.svg?branch=master)](https://travis-ci.org/nf-core/chipseq) +[![Build Status](https://travis-ci.com/nf-core/chipseq.svg?branch=master)](https://travis-ci.com/nf-core/chipseq) [![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.04.0-brightgreen.svg)](https://www.nextflow.io/) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) From 826af16101e750f6297cd46148e782dfbb0932fb Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 30 Oct 2019 15:54:56 +0000 Subject: [PATCH 094/113] Bump version to 1.1.0 --- .travis.yml | 2 +- Dockerfile | 4 ++-- environment.yml | 2 +- nextflow.config | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index ae8b1235..92a07bb7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,7 @@ before_install: - docker pull nfcore/chipseq:dev # Fake the tag locally so that the pipeline runs properly # Looks weird when this is :dev to :dev, but makes sense when testing code for a release (:dev to :1.0.1) - - docker tag nfcore/chipseq:dev nfcore/chipseq:dev + - docker tag nfcore/chipseq:dev nfcore/chipseq:1.1.0 install: # Install Nextflow diff --git a/Dockerfile b/Dockerfile index 16b864c9..bdde32f4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,5 +4,5 @@ LABEL authors="Philip Ewels" \ COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -RUN conda env export --name nf-core-chipseq-1.0.1dev > nf-core-chipseq-1.0.1dev.yml -ENV PATH /opt/conda/envs/nf-core-chipseq-1.0.1dev/bin:$PATH +RUN conda env export --name nf-core-chipseq-1.1.0 > nf-core-chipseq-1.1.0.yml +ENV PATH /opt/conda/envs/nf-core-chipseq-1.1.0/bin:$PATH diff --git a/environment.yml b/environment.yml index 357755c7..40bd3199 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-chipseq-1.0.1dev +name: nf-core-chipseq-1.1.0 channels: - conda-forge - bioconda diff --git a/nextflow.config b/nextflow.config index 146e1abc..9d285a6a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -85,7 +85,7 @@ params { // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'nfcore/chipseq:dev' +process.container = 'nfcore/chipseq:1.1.0' // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -147,7 +147,7 @@ manifest { description = 'ChIP-seq peak-calling and differential analysis pipeline.' mainScript = 'main.nf' nextflowVersion = '>=19.04.0' - version = '1.0.1dev' + version = '1.1.0' } // Function to ensure that resource requirements don't go beyond From ad6ccc1b8e2b0e7c5879f68df7453a1ad57be957 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 30 Oct 2019 15:55:05 +0000 Subject: [PATCH 095/113] Fix spacing --- main.nf | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/main.nf b/main.nf index edf83f82..62d81b79 100755 --- a/main.nf +++ b/main.nf @@ -1535,7 +1535,7 @@ workflow.onComplete { // Set up the e-mail variables def subject = "[nf-core/chipseq] Successful: $workflow.runName" if (!workflow.success) { - subject = "[nf-core/chipseq] FAILED: $workflow.runName" + subject = "[nf-core/chipseq] FAILED: $workflow.runName" } def email_fields = [:] email_fields['version'] = workflow.manifest.version @@ -1601,21 +1601,21 @@ workflow.onComplete { // Send the HTML e-mail if (email_address) { try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/chipseq] Sent summary e-mail to $email_address (sendmail)" + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "[nf-core/chipseq] Sent summary e-mail to $email_address (sendmail)" } catch (all) { - // Catch failures and try with plaintext - [ 'mail', '-s', subject, email_address ].execute() << email_txt - log.info "[nf-core/chipseq] Sent summary e-mail to $email_address (mail)" + // Catch failures and try with plaintext + [ 'mail', '-s', subject, email_address ].execute() << email_txt + log.info "[nf-core/chipseq] Sent summary e-mail to $email_address (mail)" } } // Write summary e-mail HTML to a file def output_d = new File("${params.outdir}/pipeline_info/") if (!output_d.exists()) { - output_d.mkdirs() + output_d.mkdirs() } def output_hf = new File(output_d, "pipeline_report.html") output_hf.withWriter { w -> w << email_html } @@ -1628,9 +1628,9 @@ workflow.onComplete { c_red = params.monochrome_logs ? '' : "\033[0;31m"; if (workflow.stats.ignoredCount > 0 && workflow.success) { - log.info "${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}" - log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}" - log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}" + log.info "${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}" + log.info "${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}" + log.info "${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}" } if (workflow.success) { From 62af7b8e202407145dc5d7e67eb5ee10a5e9a761 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 30 Oct 2019 15:55:12 +0000 Subject: [PATCH 096/113] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 062cab34..34bbe545 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [Unpublished Version / DEV] +## [1.1.0] - 2019-11-01 ### `Added` From c30341969170e66cbe0141b7fc8589d1950a4a30 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 30 Oct 2019 16:49:20 +0000 Subject: [PATCH 097/113] Disable ANSI in Travis --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 92a07bb7..b641d3f0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -39,4 +39,4 @@ script: # Lint the documentation - markdownlint ${TRAVIS_BUILD_DIR} -c ${TRAVIS_BUILD_DIR}/.github/markdownlint.yml # Run the pipeline with the test profile - - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker + - nextflow run ${TRAVIS_BUILD_DIR} -profile test,docker -ansi-log false From edf3b7ec1060f0d3a4140739b2babaa23fea4696 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 13:33:47 +0000 Subject: [PATCH 098/113] Move params section to nextflow.config --- conf/base.config | 8 -------- 1 file changed, 8 deletions(-) diff --git a/conf/base.config b/conf/base.config index 12f1cff4..722a01ad 100644 --- a/conf/base.config +++ b/conf/base.config @@ -43,11 +43,3 @@ process { } } - -params { - // Defaults only, expecting to be overwritten - max_memory = 128.GB - max_cpus = 16 - max_time = 240.h - igenomes_base = 's3://ngi-igenomes/igenomes/' -} From 8f99518de27c38b0cd5851f53ef7150dde8b61b9 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 13:33:52 +0000 Subject: [PATCH 099/113] Move params section to nextflow.config --- nextflow.config | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 9d285a6a..ab4f2950 100644 --- a/nextflow.config +++ b/nextflow.config @@ -69,7 +69,7 @@ params { // Options: Other help = false outdir = './results' - igenomes_base = "./iGenomes" + igenomes_base = 's3://ngi-igenomes/igenomes/' igenomes_ignore = false max_multiqc_email_size = 25.MB tracedir = "${params.outdir}/pipeline_info" @@ -81,6 +81,11 @@ params { hostnames = false clusterOptions = false + // Defaults only, expecting to be overwritten + max_memory = 128.GB + max_cpus = 16 + max_time = 240.h + } // Container slug. Stable releases should specify release tag! From d8901d06fdd1507a2191749ace2c158c38e41186 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 14:09:26 +0000 Subject: [PATCH 100/113] Update CHANGELOG --- CHANGELOG.md | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 34bbe545..0a88f38f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,29 +10,48 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` * [#46](https://github.com/nf-core/atacseq/issues/46) - Missing gene_bed path in igenomes config -* Capitalised process names -* Add quick start information to main README * Update template to tools `1.7` * Add `--trim_nextseq` parameter * Added `CITATIONS.md` file +* Capitalised process names ### `Fixed` +* **Change all parameters from `camelCase` to `snake_case`** * [#44](https://github.com/nf-core/atacseq/issues/44) - Output directory missing: macs2/consensus/deseq2 * [#45](https://github.com/nf-core/atacseq/issues/45) - Wrong x-axis scale for the HOMER: Peak annotation Counts tab plot? * [#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly * [#50](https://github.com/nf-core/atacseq/issues/50) - HOMER number of peaks does not correspond to found MACS2 peaks * Increase default resource requirements in `base.config` * Increase process-specific requirements based on user-reported failures -* Change parameter `saveGenomeIndex` to `save_reference` -* Change parameter `--design` to `--input` -* Change all parameters from `camelCase` to `snake_case` * Fixed bug in UpSetR peak intersection plot ### `Dependencies` * Bump Nextflow version to `19.04.0` +### `Deprecated` + +* `--design` -> `--input` +* `--singleEnd` -> `--single_end` +* `--saveGenomeIndex` -> `--save_reference` +* `--skipTrimming` -> `--skip_trimming` +* `--saveTrimmed` -> `--save_trimmed` +* `--keepDups` -> `--keep_dups` +* `--keepMultiMap` -> `--keep_multi_map` +* `--saveAlignedIntermediates` -> `--save_align_intermeds` +* `--narrowPeak` -> `--narrow_peak` +* `--saveMACSPileup` -> `--save_macs_pileup` +* `--skipDiffAnalysis` -> `--skip_diff_analysis` +* `--skipFastQC` -> `--skip_fastqc` +* `--skipPicardMetrics` -> `--skip_picard_metrics`` +* `--skipPreseq` -> `--skip_preseq` +* `--skipPlotProfile` -> `--skip_plot_profile` +* `--skipPlotFingerprint` -> `--skip_plot_fingerprint` +* `--skipSpp` -> `--skip_spp` +* `--skipIGV` -> `--skip_igv` +* `--skipMultiQC` -> `--skip_multiqc` + ## [1.0.0] - 2019-06-06 Initial release of nf-core/chipseq pipeline. From 2f5755f23abc0e79d91a2cb8a7a3c0042c6d896e Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 14:13:46 +0000 Subject: [PATCH 101/113] Update CHANGELOG --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a88f38f..566146bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,12 +12,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#46](https://github.com/nf-core/atacseq/issues/46) - Missing gene_bed path in igenomes config * Update template to tools `1.7` * Add `--trim_nextseq` parameter -* Added `CITATIONS.md` file +* Add `CITATIONS.md` file * Capitalised process names ### `Fixed` -* **Change all parameters from `camelCase` to `snake_case`** +* **Change all parameters from `camelCase` to `snake_case` (see [Deprecated](#Deprecated)** * [#44](https://github.com/nf-core/atacseq/issues/44) - Output directory missing: macs2/consensus/deseq2 * [#45](https://github.com/nf-core/atacseq/issues/45) - Wrong x-axis scale for the HOMER: Peak annotation Counts tab plot? * [#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly From d8d8c5067d5d53518a87200659643742fd9d9e09 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 14:20:49 +0000 Subject: [PATCH 102/113] Add parameters in table --- CHANGELOG.md | 47 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 45 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 566146bc..be71fb59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,14 +17,14 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Fixed` -* **Change all parameters from `camelCase` to `snake_case` (see [Deprecated](#Deprecated)** +* **Change all parameters from `camelCase` to `snake_case` (see [Deprecated](#Deprecated))** * [#44](https://github.com/nf-core/atacseq/issues/44) - Output directory missing: macs2/consensus/deseq2 * [#45](https://github.com/nf-core/atacseq/issues/45) - Wrong x-axis scale for the HOMER: Peak annotation Counts tab plot? * [#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly * [#50](https://github.com/nf-core/atacseq/issues/50) - HOMER number of peaks does not correspond to found MACS2 peaks +* Fixed bug in UpSetR peak intersection plot * Increase default resource requirements in `base.config` * Increase process-specific requirements based on user-reported failures -* Fixed bug in UpSetR peak intersection plot ### `Dependencies` @@ -32,6 +32,49 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Deprecated` +| Deprecated | Added | +|------------------------------|---------------------------| +| `--design` | `--input` | +| `--singleEnd` | `--single_end` | +| `--saveGenomeIndex` | `--save_reference` | +| `--skipTrimming` | `--skip_trimming` | +| `--saveTrimmed` | `--save_trimmed` | +| `--keepDups` | `--keep_dups` | +| `--keepMultiMap` | `--keep_multi_map` | +| `--saveAlignedIntermediates` | `--save_align_intermeds` | +| `--narrowPeak` | `--narrow_peak` | +| `--saveMACSPileup` | `--save_macs_pileup` | +| `--skipDiffAnalysis` | `--skip_diff_analysis` | +| `--skipFastQC` | `--skip_fastqc` | +| `--skipPicardMetrics` | `--skip_picard_metrics` | +| `--skipPreseq` | `--skip_preseq` | +| `--skipPlotProfile` | `--skip_plot_profile` | +| `--skipPlotFingerprint` | `--skip_plot_fingerprint` | +| `--skipSpp` | `--skip_spp` | +| `--skipIGV` | `--skip_igv` | +| `--skipMultiQC` | `--skip_multiqc` | + +* `--design` -> `--input` +* `--singleEnd` -> `--single_end` +* `--saveGenomeIndex` -> `--save_reference` +* `--skipTrimming` -> `--skip_trimming` +* `--saveTrimmed` -> `--save_trimmed` +* `--keepDups` -> `--keep_dups` +* `--keepMultiMap` -> `--keep_multi_map` +* `--saveAlignedIntermediates` -> `--save_align_intermeds` +* `--narrowPeak` -> `--narrow_peak` +* `--saveMACSPileup` -> `--save_macs_pileup` +* `--skipDiffAnalysis` -> `--skip_diff_analysis` +* `--skipFastQC` -> `--skip_fastqc` +* `--skipPicardMetrics` -> `--skip_picard_metrics`` +* `--skipPreseq` -> `--skip_preseq` +* `--skipPlotProfile` -> `--skip_plot_profile` +* `--skipPlotFingerprint` -> `--skip_plot_fingerprint` +* `--skipSpp` -> `--skip_spp` +* `--skipIGV` -> `--skip_igv` +* `--skipMultiQC` -> `--skip_multiqc` + + * `--design` -> `--input` * `--singleEnd` -> `--single_end` * `--saveGenomeIndex` -> `--save_reference` From 030b4dd53f05a64221ac08bb004a6bb273988763 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 14:22:46 +0000 Subject: [PATCH 103/113] Add params table --- CHANGELOG.md | 43 +------------------------------------------ 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index be71fb59..d692be6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,7 +32,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Deprecated` -| Deprecated | Added | +| Deprecated | Replacement | |------------------------------|---------------------------| | `--design` | `--input` | | `--singleEnd` | `--single_end` | @@ -54,47 +54,6 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. | `--skipIGV` | `--skip_igv` | | `--skipMultiQC` | `--skip_multiqc` | -* `--design` -> `--input` -* `--singleEnd` -> `--single_end` -* `--saveGenomeIndex` -> `--save_reference` -* `--skipTrimming` -> `--skip_trimming` -* `--saveTrimmed` -> `--save_trimmed` -* `--keepDups` -> `--keep_dups` -* `--keepMultiMap` -> `--keep_multi_map` -* `--saveAlignedIntermediates` -> `--save_align_intermeds` -* `--narrowPeak` -> `--narrow_peak` -* `--saveMACSPileup` -> `--save_macs_pileup` -* `--skipDiffAnalysis` -> `--skip_diff_analysis` -* `--skipFastQC` -> `--skip_fastqc` -* `--skipPicardMetrics` -> `--skip_picard_metrics`` -* `--skipPreseq` -> `--skip_preseq` -* `--skipPlotProfile` -> `--skip_plot_profile` -* `--skipPlotFingerprint` -> `--skip_plot_fingerprint` -* `--skipSpp` -> `--skip_spp` -* `--skipIGV` -> `--skip_igv` -* `--skipMultiQC` -> `--skip_multiqc` - - -* `--design` -> `--input` -* `--singleEnd` -> `--single_end` -* `--saveGenomeIndex` -> `--save_reference` -* `--skipTrimming` -> `--skip_trimming` -* `--saveTrimmed` -> `--save_trimmed` -* `--keepDups` -> `--keep_dups` -* `--keepMultiMap` -> `--keep_multi_map` -* `--saveAlignedIntermediates` -> `--save_align_intermeds` -* `--narrowPeak` -> `--narrow_peak` -* `--saveMACSPileup` -> `--save_macs_pileup` -* `--skipDiffAnalysis` -> `--skip_diff_analysis` -* `--skipFastQC` -> `--skip_fastqc` -* `--skipPicardMetrics` -> `--skip_picard_metrics`` -* `--skipPreseq` -> `--skip_preseq` -* `--skipPlotProfile` -> `--skip_plot_profile` -* `--skipPlotFingerprint` -> `--skip_plot_fingerprint` -* `--skipSpp` -> `--skip_spp` -* `--skipIGV` -> `--skip_igv` -* `--skipMultiQC` -> `--skip_multiqc` - ## [1.0.0] - 2019-06-06 Initial release of nf-core/chipseq pipeline. From ec4d2cc54e01d69452df3e91769220dac64a1d3a Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 14:26:58 +0000 Subject: [PATCH 104/113] Fix spacing --- main.nf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/main.nf b/main.nf index 62d81b79..51b84f3e 100755 --- a/main.nf +++ b/main.nf @@ -256,17 +256,17 @@ summary['Working Dir'] = workflow.workDir summary['Script Dir'] = workflow.projectDir summary['User'] = workflow.userName if (workflow.profile == 'awsbatch') { - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue + summary['AWS Region'] = params.awsregion + summary['AWS Queue'] = params.awsqueue } summary['Config Profile'] = workflow.profile if (params.config_profile_description) summary['Config Description'] = params.config_profile_description if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact if (params.config_profile_url) summary['Config URL'] = params.config_profile_url if (params.email || params.email_on_fail) { - summary['E-mail Address'] = params.email - summary['E-mail on failure'] = params.email_on_fail - summary['MultiQC Max Size'] = params.max_multiqc_email_size + summary['E-mail Address'] = params.email + summary['E-mail on failure'] = params.email_on_fail + summary['MultiQC Max Size'] = params.max_multiqc_email_size } log.info summary.collect { k,v -> "${k.padRight(20)}: $v" }.join("\n") log.info "-\033[2m--------------------------------------------------\033[0m-" From bc8473d57c18201ffe21eabe8daba2bf651e2c2d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 14:27:15 +0000 Subject: [PATCH 105/113] Change date --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d692be6d..1e9bdf12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [1.1.0] - 2019-11-01 +## [1.1.0] - 2019-11-04 ### `Added` From 43118e7774b28e5dea625375410cb9cd328ebdb0 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 14:36:04 +0000 Subject: [PATCH 106/113] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e9bdf12..8ac3b6a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Dependencies` -* Bump Nextflow version to `19.04.0` +* Update `Nextflow 0.32.0` -> `19.04.0` ### `Deprecated` From f95ce93c174648d92e320e54719bcd8a836dc652 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 14:37:35 +0000 Subject: [PATCH 107/113] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8ac3b6a9..e8f89c0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Dependencies` -* Update `Nextflow 0.32.0` -> `19.04.0` +* Update Nextflow `0.32.0` -> `19.04.0` ### `Deprecated` From 51311bcf49fd9e285506e778361a01f912e4559a Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 16:53:20 +0000 Subject: [PATCH 108/113] Export PYTHONNOUSERSITE in Dockerfile --- Dockerfile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index bdde32f4..8af03174 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,15 @@ FROM nfcore/base:1.7 LABEL authors="Philip Ewels" \ description="Docker image containing all requirements for nf-core/chipseq pipeline" +# Install the conda environment COPY environment.yml / RUN conda env create -f /environment.yml && conda clean -a -RUN conda env export --name nf-core-chipseq-1.1.0 > nf-core-chipseq-1.1.0.yml + +# Add conda installation dir to PATH (instead of doing 'conda activate') ENV PATH /opt/conda/envs/nf-core-chipseq-1.1.0/bin:$PATH + +# Dump the details of the installed packages to a file for posterity +RUN conda env export --name nf-core-chipseq-1.1.0 > nf-core-chipseq-1.1.0.yml + +# Prevent Python from loading packages from outside the container +ENV PYTHONNOUSERSITE=1 From e9413a062a57d98741c4828695974d9dcf684473 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 18:00:53 +0000 Subject: [PATCH 109/113] Add PYTHONNOUSERSITE to nextflow.config --- nextflow.config | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/nextflow.config b/nextflow.config index ab4f2950..36196977 100644 --- a/nextflow.config +++ b/nextflow.config @@ -125,6 +125,11 @@ if (!params.igenomes_ignore) { // Increase time available to build conda environment conda { createTimeout = "60 min" } +// Export this variable to prevent local Python libraries from conflicting with those in the container +env { + PYTHONNOUSERSITE = 1 +} + // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] From 977e4b6a19cbd1a7648ff7015791a017b5dbefa3 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 18:11:07 +0000 Subject: [PATCH 110/113] Bump Nextflow to 19.10.0 --- .travis.yml | 2 +- README.md | 2 +- nextflow.config | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index b641d3f0..f86c8abf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -30,7 +30,7 @@ install: - sudo apt-get install npm && npm install -g markdownlint-cli env: - - NXF_VER='19.04.0' # Specify a minimum NF version that should be tested and work + - NXF_VER='19.10.0' # Specify a minimum NF version that should be tested and work - NXF_VER='' # Plus: get the latest NF version and check that it works script: diff --git a/README.md b/README.md index c80f40b1..543f009f 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # ![nf-core/chipseq](docs/images/nf-core-chipseq_logo.png) [![Build Status](https://travis-ci.com/nf-core/chipseq.svg?branch=master)](https://travis-ci.com/nf-core/chipseq) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.04.0-brightgreen.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.10.0-brightgreen.svg)](https://www.nextflow.io/) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) [![Docker](https://img.shields.io/docker/automated/nfcore/chipseq.svg)](https://hub.docker.com/r/nfcore/chipseq/) diff --git a/nextflow.config b/nextflow.config index 36196977..c758ab92 100644 --- a/nextflow.config +++ b/nextflow.config @@ -156,7 +156,7 @@ manifest { homePage = 'https://github.com/nf-core/chipseq' description = 'ChIP-seq peak-calling and differential analysis pipeline.' mainScript = 'main.nf' - nextflowVersion = '>=19.04.0' + nextflowVersion = '>=19.10.0' version = '1.1.0' } From b9beb149a6805f3682c85a857cb0f5c6fd59728d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 18:15:14 +0000 Subject: [PATCH 111/113] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8f89c0f..768d00a0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Dependencies` -* Update Nextflow `0.32.0` -> `19.04.0` +* Update Nextflow `0.32.0` -> `19.10.0` ### `Deprecated` From 26396bcb9fa246ce462cf7710fb14ab96427e638 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 1 Nov 2019 18:30:36 +0000 Subject: [PATCH 112/113] Remove PYTHONNOUSERSITE from Dockerfile --- Dockerfile | 3 --- 1 file changed, 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 8af03174..c43fecf6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,6 +11,3 @@ ENV PATH /opt/conda/envs/nf-core-chipseq-1.1.0/bin:$PATH # Dump the details of the installed packages to a file for posterity RUN conda env export --name nf-core-chipseq-1.1.0 > nf-core-chipseq-1.1.0.yml - -# Prevent Python from loading packages from outside the container -ENV PYTHONNOUSERSITE=1 From 7456784d2c25518cf79665f85b74a0a5e7a50f06 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 4 Nov 2019 11:22:36 +0000 Subject: [PATCH 113/113] Update CHANGELOG date --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 768d00a0..6c2fbe56 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [1.1.0] - 2019-11-04 +## [1.1.0] - 2019-11-05 ### `Added`