From 507773e28d0695c5ade9bf74be46739da7f3a283 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 1 Jun 2020 15:55:24 +0100 Subject: [PATCH 001/129] Bump version --- .github/workflows/ci.yml | 8 ++++---- Dockerfile | 4 ++-- environment.yml | 2 +- nextflow.config | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2f031c0e..5ffc8401 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -22,7 +22,7 @@ jobs: - name: Pull docker image run: | docker pull nfcore/viralrecon:dev - docker tag nfcore/viralrecon:dev nfcore/viralrecon:1.0.0 + docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev - name: Run pipeline with test data run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker @@ -44,7 +44,7 @@ jobs: - name: Pull docker image run: | docker pull nfcore/viralrecon:dev - docker tag nfcore/viralrecon:dev nfcore/viralrecon:1.0.0 + docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev - name: Run pipeline with test amplicon data with various options run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} @@ -66,7 +66,7 @@ jobs: - name: Pull docker image run: | docker pull nfcore/viralrecon:dev - docker tag nfcore/viralrecon:dev nfcore/viralrecon:1.0.0 + docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev - name: Run pipeline with minimal data via SRA ids and various options run: | nextflow run ${GITHUB_WORKSPACE} -profile test_sra,docker ${{ matrix.parameters }} @@ -88,7 +88,7 @@ jobs: - name: Pull docker image run: | docker pull nfcore/viralrecon:dev - docker tag nfcore/viralrecon:dev nfcore/viralrecon:1.0.0 + docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev - name: Run pipeline with minimal SISPA data and various options run: | nextflow run ${GITHUB_WORKSPACE} -profile test_sispa,docker ${{ matrix.parameters }} diff --git a/Dockerfile b/Dockerfile index e2ff3e00..4752f69c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,10 +10,10 @@ RUN conda env create -f /environment.yml && conda clean -a RUN apt-get install -y libgl1-mesa-glx && apt-get clean -y # Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-viralrecon-1.0.0/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-viralrecon-1.1.0dev/bin:$PATH # Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-viralrecon-1.0.0 > nf-core-viralrecon-1.0.0.yml +RUN conda env export --name nf-core-viralrecon-1.1.0dev > nf-core-viralrecon-1.1.0dev.yml # Instruct R processes to use these empty files instead of clashing with a local version RUN touch .Rprofile diff --git a/environment.yml b/environment.yml index 417f1353..22de35b6 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-viralrecon-1.0.0 +name: nf-core-viralrecon-1.1.0dev channels: - conda-forge - bioconda diff --git a/nextflow.config b/nextflow.config index b0432401..bb498943 100644 --- a/nextflow.config +++ b/nextflow.config @@ -97,7 +97,7 @@ params { // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'nfcore/viralrecon:1.0.0' +process.container = 'nfcore/viralrecon:dev' // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -172,7 +172,7 @@ manifest { description = 'Assembly and intrahost/low-frequency variant calling for viral samples' mainScript = 'main.nf' nextflowVersion = '>=19.10.0' - version = '1.0.0' + version = '1.1.0dev' } // Function to ensure that resource requirements don't go beyond From 2ab244ce6ca3e062a3d75a36f7555c2f54e7fd30 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 1 Jun 2020 15:56:36 +0100 Subject: [PATCH 002/129] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 734d327b..eb470df9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [Unpublished Version / DEV] + ## [1.0.0] - 2020-06-01 Initial release of nf-core/viralrecon, created with the [nf-core](http://nf-co.re/) template. From a799de664e1408e543ac3f08ff02ab6254e46a56 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 2 Jun 2020 11:24:18 +0100 Subject: [PATCH 003/129] Update AWS tests --- .github/workflows/awstest.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 5ccf4182..84f27c4d 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -11,6 +11,7 @@ on: jobs: run-awstest: + if: github.repository == 'nf-core/viralrecon' name: Run AWS test runs-on: ubuntu-latest steps: @@ -26,5 +27,10 @@ jobs: AWS_ACCESS_KEY_ID: ${{secrets.AWS_KEY_ID}} AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_KEY_SECRET}} TOWER_ACCESS_TOKEN: ${{secrets.TOWER_ACCESS_TOKEN}} - run: | # Submits job to AWS batch using a 'nextflow-big' instance. Setting JVM options to "-XX:+UseG1GC" for more efficient garbage collection when staging remote files. - aws batch submit-job --region eu-west-1 --job-name nf-core-viralrecon --job-queue 'default-8b3836e0-5eda-11ea-96e5-0a2c3f6a2a32' --job-definition nextflow-4GiB --container-overrides '{"command": ["nf-core/viralrecon", "-r '"${GITHUB_SHA}"' -profile test_full --outdir s3://nf-core-awsmegatests/viralrecon/results-'"${GITHUB_SHA}"' -w s3://nf-core-awsmegatests/viralrecon/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}, {"name": "NXF_OPTS", "value": "-XX:+UseG1GC"}]}' + run: | # Submits job to AWS batch using a 'nextflow-4GiB' instance. Setting JVM options to "-XX:+UseG1GC" for more efficient garbage collection when staging remote files. + aws batch submit-job \ + --region eu-west-1 \ + --job-name nf-core-viralrecon \ + --job-queue 'default-8b3836e0-5eda-11ea-96e5-0a2c3f6a2a32' \ + --job-definition nextflow-4GiB \ + --container-overrides '{"command": ["nf-core/viralrecon", "-r '"${GITHUB_SHA}"' -profile test_full --outdir s3://nf-core-awsmegatests/viralrecon/results-'"${GITHUB_SHA}"' -w s3://nf-core-awsmegatests/viralrecon/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}, {"name": "NXF_OPTS", "value": "-XX:+UseG1GC"}]}' From adf0de9afcaad44699febc79f08b5213a75c2416 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 5 Jun 2020 14:51:58 +0100 Subject: [PATCH 004/129] Update CHANGELOG --- CHANGELOG.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb470df9..78bec663 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,13 +5,20 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unpublished Version / DEV] +### `Dependencies` + +* Add r-ggplot2 `3.3.1` +* Add r-tidyr `1.1.0` +* Add r-stringr `1.4.0` +* Add aspera-cli `3.9.1` + ## [1.0.0] - 2020-06-01 Initial release of nf-core/viralrecon, created with the [nf-core](http://nf-co.re/) template. This pipeline is a re-implementation of the [SARS_Cov2_consensus-nf](https://github.com/BU-ISCIII/SARS_Cov2_consensus-nf) and [SARS_Cov2_assembly-nf](https://github.com/BU-ISCIII/SARS_Cov2_assembly-nf) pipelines initially developed by [Sarai Varona](https://github.com/svarona) and [Sara Monzon](https://github.com/saramonzon) from [BU-ISCIII](https://github.com/BU-ISCIII). Porting both of these pipelines to nf-core was an international collaboration between numerous contributors and developers, led by [Harshil Patel](https://github.com/drpatelh) from the [The Bioinformatics & Biostatistics Group](https://www.crick.ac.uk/research/science-technology-platforms/bioinformatics-and-biostatistics/) at [The Francis Crick Institute](https://www.crick.ac.uk/), London. We appreciated the need to have a portable, reproducible and scalable pipeline for the analysis of COVID-19 sequencing samples and so the Avengers Assembled! -### Pipeline summary +### `Pipeline summary` 1. Download samples via SRA, ENA or GEO ids ([`ENA FTP`](https://ena-docs.readthedocs.io/en/latest/retrieval/file-download.html), [`parallel-fastq-dump`](https://github.com/rvalieris/parallel-fastq-dump); *if required*) 2. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html); *if required*) From d0e3e179d14b95eb8a1b08c3edcea5012e741206 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 5 Jun 2020 14:52:06 +0100 Subject: [PATCH 005/129] Update packages --- environment.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/environment.yml b/environment.yml index 22de35b6..b3892d58 100644 --- a/environment.yml +++ b/environment.yml @@ -15,6 +15,10 @@ dependencies: - conda-forge::pigz=2.3.4 - conda-forge::r-base=3.6.2 - conda-forge::bc=1.07.1 + - conda-forge::r-ggplot2=3.3.1 + - conda-forge::r-tidyr=1.1.0 + - conda-forge::r-stringr=1.4.0 + - hcc::aspera-cli=3.9.1 ## bioconda packages ## common From d2593ccffc2d025f8e613e841123be3aab663b7f Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 11 Jun 2020 21:41:45 +0200 Subject: [PATCH 006/129] Readme tweaks --- README.md | 51 ++++++++++++++++++++++++++++----------------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 4c5d43b5..9576f3d7 100644 --- a/README.md +++ b/README.md @@ -3,25 +3,32 @@ [![GitHub Actions CI Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/viralrecon/actions) [![GitHub Actions Linting Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/viralrecon/actions) [![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.10.0-brightgreen.svg)](https://www.nextflow.io/) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3872730.svg)](https://doi.org/10.5281/zenodo.3872730) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) [![Docker](https://img.shields.io/docker/automated/nfcore/viralrecon.svg)](https://hub.docker.com/r/nfcore/viralrecon) -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3872730.svg)](https://doi.org/10.5281/zenodo.3872730) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23viralrecon-4A154B?logo=slack)](https://nfcore.slack.com/channels/viralrecon) ## Introduction -**nfcore/viralrecon** is a bioinformatics analysis pipeline used to perform assembly and intrahost/low-frequency variant calling for viral samples. The pipeline currently supports metagenomics and amplicon sequencing data derived from the Illumina sequencing platform. +**nfcore/viralrecon** is a bioinformatics analysis pipeline used to perform assembly and intrahost/low-frequency variant calling for viral samples. + +The pipeline supports short-read illumina data from both shotgun and enriched library preparation methods (amplicon, target enrichment). -This pipeline is a re-implementation of the [SARS_Cov2_consensus-nf](https://github.com/BU-ISCIII/SARS_Cov2_consensus-nf) and [SARS_Cov2_assembly-nf](https://github.com/BU-ISCIII/SARS_Cov2_assembly-nf) pipelines initially developed by [Sarai Varona](https://github.com/svarona) and [Sara Monzon](https://github.com/saramonzon) from [BU-ISCIII](https://github.com/BU-ISCIII). Porting both of these pipelines to nf-core was an international collaboration between numerous contributors and developers, led by [Harshil Patel](https://github.com/drpatelh) from the [The Bioinformatics & Biostatistics Group](https://www.crick.ac.uk/research/science-technology-platforms/bioinformatics-and-biostatistics/) at [The Francis Crick Institute](https://www.crick.ac.uk/), London. We appreciated the need to have a portable, reproducible and scalable pipeline for the analysis of COVID-19 sequencing samples and so the Avengers Assembled! Please come and join us and add yourself to the contributor list :) +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. Furthermore, automated continuous integration tests that run the pipeline on a full-sized dataset using AWS cloud ensure that the code is stable. -We have integrated a number of options in the pipeline to allow you to run specific aspects of the workflow if you so wish. For example, you can skip all of the assembly steps with the `--skip_assembly` parameter. See [usage docs](docs/usage.md) for all of the available options when running the pipeline. +## Example output -Please click [here](https://raw.githack.com/nf-core/viralrecon/master/docs/html/multiqc_report.html) to see an example MultiQC report generated using the parameters defined in [this configuration file](https://github.com/nf-core/viralrecon/blob/master/conf/test_full.config) to run the pipeline on [samples](https://zenodo.org/record/3735111) which were prepared from the [ncov-2019 ARTIC Network V1 amplicon set](https://artic.network/ncov-2019) and sequenced on the Illumina MiSeq platform in 301bp paired-end format. +The viralrecon pipeline provides analysis on a single target genome of interest, such as a virus. Numerous QC and reporting steps are included, finally finishing with a single [MultiQC](https://multiqc.info/) report containing a full summary of the analysis. -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. Furthermore, automated continuous integration tests to run the pipeline on a full-sized dataset are passing on AWS cloud. +To get a feel for what to expect, you can see [an example MultiQC report here](https://raw.githack.com/nf-core/viralrecon/master/docs/html/multiqc_report.html), generated using the parameters defined in [this configuration file](https://github.com/nf-core/viralrecon/blob/master/conf/test_full.config). The pipeline was run with [these samples](https://zenodo.org/record/3735111), prepared from the [ncov-2019 ARTIC Network V1 amplicon set](https://artic.network/ncov-2019) and sequenced on the Illumina MiSeq platform in 301bp paired-end format. ## Pipeline summary +NB: The pipeline has a number of options to allow you to run only specific aspects of the workflow if you so wish. +For example, you can skip all of the assembly steps with the `--skip_assembly` parameter. +See the [usage docs](docs/usage.md) for all of the available options when running the pipeline. + 1. Download samples via SRA, ENA or GEO ids ([`ENA FTP`](https://ena-docs.readthedocs.io/en/latest/retrieval/file-download.html), [`parallel-fastq-dump`](https://github.com/rvalieris/parallel-fastq-dump); *if required*) 2. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html); *if required*) 3. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) @@ -49,25 +56,23 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool ## Quick Start -i. Install [`nextflow`](https://nf-co.re/usage/installation) - -ii. Install either [`Docker`](https://docs.docker.com/engine/installation/) or [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) for full pipeline reproducibility (please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles)) - -iii. Download the pipeline and test it on a minimal dataset with a single command +1. Install [`nextflow`](https://nf-co.re/usage/installation) +2. Install either [`Docker`](https://docs.docker.com/engine/installation/) or [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ +3. Download the pipeline and test it on a minimal dataset with a single command: -```bash -nextflow run nf-core/viralrecon -profile test, -``` + ```bash + nextflow run nf-core/viralrecon -profile test, + ``` -> Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + > Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. -iv. Start running your own analysis! +4. Start running your own analysis! -```bash -nextflow run nf-core/viralrecon -profile --input samplesheet.csv --genome 'NC_045512.2' -profile docker -``` + ```bash + nextflow run nf-core/viralrecon -profile --input samplesheet.csv --genome 'NC_045512.2' -profile docker + ``` -See [usage docs](docs/usage.md) for all of the available options when running the pipeline. +See the [usage documentation](docs/usage.md) for all of the available options when running the pipeline. ## Documentation @@ -86,7 +91,7 @@ The nf-core/viralrecon pipeline comes with documentation about the pipeline, fou These scripts were originally written by [Sarai Varona](https://github.com/svarona), [Miguel Juliá](https://github.com/MiguelJulia) and [Sara Monzon](https://github.com/saramonzon) from [BU-ISCIII](https://github.com/BU-ISCIII) and co-ordinated by Isabel Cuesta for the [Institute of Health Carlos III](https://eng.isciii.es/eng.isciii.es/Paginas/Inicio.html), Spain. Through collaboration with the nf-core community the pipeline has now been updated substantially to include additional processing steps, to standardise inputs/outputs and to improve pipeline reporting; implemented primarily by [Harshil Patel](https://github.com/drpatelh) from [The Bioinformatics & Biostatistics Group](https://www.crick.ac.uk/research/science-technology-platforms/bioinformatics-and-biostatistics/) at [The Francis Crick Institute](https://www.crick.ac.uk/), London. -Many thanks to others who have helped out and contributed along the way too, including (but not limited to): +Many thanks to others who have helped out and contributed along the way too, including (but not limited to)\*: | Name | Affiliation | |-----------------------------------------------------------|---------------------------------------------------------------------------------------| @@ -107,13 +112,13 @@ Many thanks to others who have helped out and contributed along the way too, inc | [Stephen Kelly](https://github.com/stevekm) | [Memorial Sloan Kettering Cancer Center, USA](https://www.mskcc.org/) | | [Thanh Le Viet](https://github.com/thanhleviet) | [Quadram Institute, UK](https://quadram.ac.uk/) | -> Listed in alphabetical order +> \* Listed in alphabetical order ## Contributions and Support If you would like to contribute to this pipeline, please see the [contributing guidelines](https://github.com/nf-core/viralrecon/blob/master/.github/CONTRIBUTING.md). -For further information or help, don't hesitate to get in touch on [Slack](https://nfcore.slack.com/channels/viralrecon) (you can join with [this invite](https://nf-co.re/join/slack)). +For further information or help, don't hesitate to get in touch on [Slack `#viralrecon` channel](https://nfcore.slack.com/channels/viralrecon) (you can join with [this invite](https://nf-co.re/join/slack)). ## Citation From 77fa4f89d89e04ad5b84cb6a245401806a9189a1 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Mon, 15 Jun 2020 09:48:11 +0200 Subject: [PATCH 007/129] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Harshil Patel Co-authored-by: Sara Monzón --- README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 9576f3d7..6b6905e8 100644 --- a/README.md +++ b/README.md @@ -11,17 +11,16 @@ ## Introduction -**nfcore/viralrecon** is a bioinformatics analysis pipeline used to perform assembly and intrahost/low-frequency variant calling for viral samples. +**nfcore/viralrecon** is a bioinformatics analysis pipeline used to perform assembly and intra-host/low-frequency variant calling for viral samples. -The pipeline supports short-read illumina data from both shotgun and enriched library preparation methods (amplicon, target enrichment). +The pipeline supports short-read Illumina sequencing data from both shotgun (e.g. sequencing directly from clinical samples) and enrichment-based library preparation methods (e.g. amplicon-based: [ARTIC SARS-CoV-2 enrichment protocol](https://artic.network/ncov-2019); or probe-capture-based). -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. Furthermore, automated continuous integration tests that run the pipeline on a full-sized dataset using AWS cloud ensure that the code is stable. +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with Docker containers making installation trivial and results highly reproducible. Furthermore, automated continuous integration tests that run the pipeline on a full-sized dataset using AWS cloud ensure that the code is stable. -## Example output +## Pipeline reporting -The viralrecon pipeline provides analysis on a single target genome of interest, such as a virus. Numerous QC and reporting steps are included, finally finishing with a single [MultiQC](https://multiqc.info/) report containing a full summary of the analysis. +Numerous QC and reporting steps are included in the pipeline in order to collate a full summary of the analysis within a single [MultiQC](https://multiqc.info/) report. You can see [an example MultiQC report here](https://raw.githack.com/nf-core/viralrecon/master/docs/html/multiqc_report.html), generated using the parameters defined in [this configuration file](https://github.com/nf-core/viralrecon/blob/master/conf/test_full.config). The pipeline was run with [these samples](https://zenodo.org/record/3735111), prepared from the [ncov-2019 ARTIC Network V1 amplicon set](https://artic.network/ncov-2019) and sequenced on the Illumina MiSeq platform in 301bp paired-end format. -To get a feel for what to expect, you can see [an example MultiQC report here](https://raw.githack.com/nf-core/viralrecon/master/docs/html/multiqc_report.html), generated using the parameters defined in [this configuration file](https://github.com/nf-core/viralrecon/blob/master/conf/test_full.config). The pipeline was run with [these samples](https://zenodo.org/record/3735111), prepared from the [ncov-2019 ARTIC Network V1 amplicon set](https://artic.network/ncov-2019) and sequenced on the Illumina MiSeq platform in 301bp paired-end format. ## Pipeline summary From 2fd8ab97122281236fc86e277db3d4c79adad127 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Mon, 15 Jun 2020 09:49:02 +0200 Subject: [PATCH 008/129] Readme - move sentence to after summary --- README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 6b6905e8..0cef6597 100644 --- a/README.md +++ b/README.md @@ -19,15 +19,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool ## Pipeline reporting -Numerous QC and reporting steps are included in the pipeline in order to collate a full summary of the analysis within a single [MultiQC](https://multiqc.info/) report. You can see [an example MultiQC report here](https://raw.githack.com/nf-core/viralrecon/master/docs/html/multiqc_report.html), generated using the parameters defined in [this configuration file](https://github.com/nf-core/viralrecon/blob/master/conf/test_full.config). The pipeline was run with [these samples](https://zenodo.org/record/3735111), prepared from the [ncov-2019 ARTIC Network V1 amplicon set](https://artic.network/ncov-2019) and sequenced on the Illumina MiSeq platform in 301bp paired-end format. - +Numerous QC and reporting steps are included in the pipeline in order to collate a full summary of the analysis within a single [MultiQC](https://multiqc.info/) report. You can see [an example MultiQC report here](https://raw.githack.com/nf-core/viralrecon/master/docs/html/multiqc_report.html), generated using the parameters defined in [this configuration file](https://github.com/nf-core/viralrecon/blob/master/conf/test_full.config). The pipeline was run with [these samples](https://zenodo.org/record/3735111), prepared from the [ncov-2019 ARTIC Network V1 amplicon set](https://artic.network/ncov-2019) and sequenced on the Illumina MiSeq platform in 301bp paired-end format. ## Pipeline summary -NB: The pipeline has a number of options to allow you to run only specific aspects of the workflow if you so wish. -For example, you can skip all of the assembly steps with the `--skip_assembly` parameter. -See the [usage docs](docs/usage.md) for all of the available options when running the pipeline. - 1. Download samples via SRA, ENA or GEO ids ([`ENA FTP`](https://ena-docs.readthedocs.io/en/latest/retrieval/file-download.html), [`parallel-fastq-dump`](https://github.com/rvalieris/parallel-fastq-dump); *if required*) 2. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html); *if required*) 3. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) @@ -53,6 +48,10 @@ See the [usage docs](docs/usage.md) for all of the available options when runnin * Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) 7. Present QC and visualisation for raw read, alignment, assembly and variant calling results ([`MultiQC`](http://multiqc.info/)) +Note that the pipeline has a number of options to allow you to run only specific aspects of the workflow if you so wish. +For example, you can skip all of the assembly steps with the `--skip_assembly` parameter. +See the [usage docs](docs/usage.md) for all of the available options when running the pipeline. + ## Quick Start 1. Install [`nextflow`](https://nf-co.re/usage/installation) From e1134ed88cec082d6f7373ba96bfc4e0ecc412bc Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Mon, 15 Jun 2020 09:58:43 +0200 Subject: [PATCH 009/129] more linebreaks --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 0cef6597..378d08ff 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,9 @@ See the [usage docs](docs/usage.md) for all of the available options when runnin ## Quick Start 1. Install [`nextflow`](https://nf-co.re/usage/installation) + 2. Install either [`Docker`](https://docs.docker.com/engine/installation/) or [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ + 3. Download the pipeline and test it on a minimal dataset with a single command: ```bash From 0e7c3ae1e1b91dceacc513680bd591086d3aa5b7 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 15 Jun 2020 10:57:48 +0100 Subject: [PATCH 010/129] Revert "Update CHANGELOG" This reverts commit adf0de9afcaad44699febc79f08b5213a75c2416. --- CHANGELOG.md | 9 +-------- environment.yml | 4 ---- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 78bec663..eb470df9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,20 +5,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unpublished Version / DEV] -### `Dependencies` - -* Add r-ggplot2 `3.3.1` -* Add r-tidyr `1.1.0` -* Add r-stringr `1.4.0` -* Add aspera-cli `3.9.1` - ## [1.0.0] - 2020-06-01 Initial release of nf-core/viralrecon, created with the [nf-core](http://nf-co.re/) template. This pipeline is a re-implementation of the [SARS_Cov2_consensus-nf](https://github.com/BU-ISCIII/SARS_Cov2_consensus-nf) and [SARS_Cov2_assembly-nf](https://github.com/BU-ISCIII/SARS_Cov2_assembly-nf) pipelines initially developed by [Sarai Varona](https://github.com/svarona) and [Sara Monzon](https://github.com/saramonzon) from [BU-ISCIII](https://github.com/BU-ISCIII). Porting both of these pipelines to nf-core was an international collaboration between numerous contributors and developers, led by [Harshil Patel](https://github.com/drpatelh) from the [The Bioinformatics & Biostatistics Group](https://www.crick.ac.uk/research/science-technology-platforms/bioinformatics-and-biostatistics/) at [The Francis Crick Institute](https://www.crick.ac.uk/), London. We appreciated the need to have a portable, reproducible and scalable pipeline for the analysis of COVID-19 sequencing samples and so the Avengers Assembled! -### `Pipeline summary` +### Pipeline summary 1. Download samples via SRA, ENA or GEO ids ([`ENA FTP`](https://ena-docs.readthedocs.io/en/latest/retrieval/file-download.html), [`parallel-fastq-dump`](https://github.com/rvalieris/parallel-fastq-dump); *if required*) 2. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html); *if required*) diff --git a/environment.yml b/environment.yml index b3892d58..22de35b6 100644 --- a/environment.yml +++ b/environment.yml @@ -15,10 +15,6 @@ dependencies: - conda-forge::pigz=2.3.4 - conda-forge::r-base=3.6.2 - conda-forge::bc=1.07.1 - - conda-forge::r-ggplot2=3.3.1 - - conda-forge::r-tidyr=1.1.0 - - conda-forge::r-stringr=1.4.0 - - hcc::aspera-cli=3.9.1 ## bioconda packages ## common From f9f65302e95cee22e470888e9aee4867df35d0cd Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 15 Jun 2020 15:49:59 +0100 Subject: [PATCH 011/129] Build Docker image via PR --- .github/workflows/branch.yml | 28 ++++- .github/workflows/ci.yml | 217 +++++++++++++++++++++++++--------- .github/workflows/linting.yml | 13 +- 3 files changed, 199 insertions(+), 59 deletions(-) diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index edd515eb..3b09502e 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -3,14 +3,32 @@ name: nf-core branch protection # It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` on: pull_request: - branches: - - master + branches: [master] jobs: test: - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - # PRs are only ok if coming from an nf-core `dev` branch or a fork `patch` branch + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - name: Check PRs + if: github.repository == 'nf-core/viralrecon' run: | - { [[ $(git remote get-url origin) == *nf-core/viralrecon ]] && [[ ${GITHUB_HEAD_REF} = "dev" ]]; } || [[ ${GITHUB_HEAD_REF} == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name}} == nf-core/viralrecon ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the ${{github.event.pull_request.head.repo.full_name}} `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Beacuse of this, PRs to `master` are only allowed if they come from the ${{github.event.pull_request.head.repo.full_name}} `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5ffc8401..8639a4dc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,94 +1,205 @@ name: nf-core CI -# This workflow is triggered on pushes and PRs to the repository. +# This workflow is triggered on releases and pull-requests. # It runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: [push, pull_request] +on: + push: + branches: + - dev + pull_request: + release: + types: [published] jobs: test: + name: Run default workflow tests + # Only run on push if this is the nf-core dev branch (merged PRs) + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest env: NXF_VER: ${{ matrix.nxf_ver }} NXF_ANSI_LOG: false - runs-on: ubuntu-latest strategy: matrix: # Nextflow versions: check pipeline minimum and current latest nxf_ver: ['19.10.0', ''] steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Check if Dockerfile or Conda environment changed + uses: technote-space/get-diff-action@v1 + with: + PREFIX_FILTER: | + Dockerfile + environment.yml + + - name: Build new docker image + if: env.GIT_DIFF + run: docker build --no-cache . -t nfcore/viralrecon:dev + - name: Pull docker image + if: ${{ !env.GIT_DIFF }} run: | docker pull nfcore/viralrecon:dev docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + - name: Run pipeline with test data run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker parameters: + name: Run workflow tests for multiple parameters + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest env: NXF_VER: '19.10.0' NXF_ANSI_LOG: false - runs-on: ubuntu-latest strategy: matrix: parameters: [--skip_adapter_trimming, --skip_markduplicates, --skip_variants, --skip_amplicon_trimming, --skip_kraken2, --skip_assembly] steps: - - uses: actions/checkout@v2 + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Check if Dockerfile or Conda environment changed + uses: technote-space/get-diff-action@v1 + with: + PREFIX_FILTER: | + Dockerfile + environment.yml + + - name: Build new docker image + if: env.GIT_DIFF + run: docker build --no-cache . -t nfcore/viralrecon:dev + + - name: Pull docker image + if: ${{ !env.GIT_DIFF }} + run: | + docker pull nfcore/viralrecon:dev + docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with test amplicon data with various options + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} + + test_sra: + name: Run workflow tests on SRA downloaded data + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest + env: + NXF_VER: '19.10.0' + NXF_ANSI_LOG: false + strategy: + matrix: + parameters: [--skip_sra, ''] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Check if Dockerfile or Conda environment changed + uses: technote-space/get-diff-action@v1 + with: + PREFIX_FILTER: | + Dockerfile + environment.yml + + - name: Build new docker image + if: env.GIT_DIFF + run: docker build --no-cache . -t nfcore/viralrecon:dev + - name: Pull docker image + if: ${{ !env.GIT_DIFF }} run: | docker pull nfcore/viralrecon:dev docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev - - name: Run pipeline with test amplicon data with various options + + - name: Install Nextflow run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ - test_sra: - env: - NXF_VER: '19.10.0' - NXF_ANSI_LOG: false - runs-on: ubuntu-latest - strategy: - matrix: - parameters: [--skip_sra, ''] - steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Pull docker image - run: | - docker pull nfcore/viralrecon:dev - docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev - - name: Run pipeline with minimal data via SRA ids and various options - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_sra,docker ${{ matrix.parameters }} + - name: Run pipeline with minimal data via SRA ids and various options + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_sra,docker ${{ matrix.parameters }} test_sispa: - env: - NXF_VER: '19.10.0' - NXF_ANSI_LOG: false - runs-on: ubuntu-latest - strategy: - matrix: - parameters: [--gff false, ''] - steps: - - uses: actions/checkout@v2 - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ - - name: Pull docker image - run: | - docker pull nfcore/viralrecon:dev - docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev - - name: Run pipeline with minimal SISPA data and various options - run: | - nextflow run ${GITHUB_WORKSPACE} -profile test_sispa,docker ${{ matrix.parameters }} + name: Run workflow tests for SISPA data + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + runs-on: ubuntu-latest + env: + NXF_VER: '19.10.0' + NXF_ANSI_LOG: false + strategy: + matrix: + parameters: [--gff false, ''] + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Check if Dockerfile or Conda environment changed + uses: technote-space/get-diff-action@v1 + with: + PREFIX_FILTER: | + Dockerfile + environment.yml + + - name: Build new docker image + if: env.GIT_DIFF + run: docker build --no-cache . -t nfcore/viralrecon:dev + + - name: Pull docker image + if: ${{ !env.GIT_DIFF }} + run: | + docker pull nfcore/viralrecon:dev + docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev + + - name: Install Nextflow + run: | + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ + + - name: Run pipeline with minimal SISPA data and various options + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_sispa,docker ${{ matrix.parameters }} + + push_dockerhub: + name: Push new Docker image to Docker Hub + runs-on: ubuntu-latest + # Only run if the tests passed + needs: test + # Only run for the nf-core repo, for releases and merged PRs + if: ${{ github.repository == 'nf-core/viralrecon' && (github.event_name == 'release' || github.event_name == 'push') }} + env: + DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} + DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} + steps: + - name: Check out pipeline code + uses: actions/checkout@v2 + + - name: Build new docker image + run: docker build --no-cache . -t nfcore/viralrecon:latest + + - name: Push Docker image to DockerHub (dev) + if: ${{ github.event_name == 'push' }} + run: | + echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin + docker tag nfcore/viralrecon:latest nfcore/viralrecon:dev + docker push nfcore/viralrecon:dev + + - name: Push Docker image to DockerHub (release) + if: ${{ github.event_name == 'release' }} + run: | + echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin + docker push nfcore/viralrecon:latest + docker tag nfcore/viralrecon:latest nfcore/viralrecon:${{ github.ref }} + docker push nfcore/viralrecon:${{ github.ref }} diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 1e0827a8..eb66c144 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -33,18 +33,29 @@ jobs: nf-core: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + + - name: Check out pipeline code + uses: actions/checkout@v2 + - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ + - uses: actions/setup-python@v1 with: python-version: '3.6' architecture: 'x64' + - name: Install dependencies run: | python -m pip install --upgrade pip pip install nf-core + - name: Run nf-core lint + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} run: nf-core lint ${GITHUB_WORKSPACE} + From 20cfea073526cd8a96c103ab714d0d81637ac45d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 15 Jun 2020 15:50:16 +0100 Subject: [PATCH 012/129] Update contributors --- README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 378d08ff..781a2d40 100644 --- a/README.md +++ b/README.md @@ -17,10 +17,6 @@ The pipeline supports short-read Illumina sequencing data from both shotgun (e.g The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with Docker containers making installation trivial and results highly reproducible. Furthermore, automated continuous integration tests that run the pipeline on a full-sized dataset using AWS cloud ensure that the code is stable. -## Pipeline reporting - -Numerous QC and reporting steps are included in the pipeline in order to collate a full summary of the analysis within a single [MultiQC](https://multiqc.info/) report. You can see [an example MultiQC report here](https://raw.githack.com/nf-core/viralrecon/master/docs/html/multiqc_report.html), generated using the parameters defined in [this configuration file](https://github.com/nf-core/viralrecon/blob/master/conf/test_full.config). The pipeline was run with [these samples](https://zenodo.org/record/3735111), prepared from the [ncov-2019 ARTIC Network V1 amplicon set](https://artic.network/ncov-2019) and sequenced on the Illumina MiSeq platform in 301bp paired-end format. - ## Pipeline summary 1. Download samples via SRA, ENA or GEO ids ([`ENA FTP`](https://ena-docs.readthedocs.io/en/latest/retrieval/file-download.html), [`parallel-fastq-dump`](https://github.com/rvalieris/parallel-fastq-dump); *if required*) @@ -52,6 +48,10 @@ Note that the pipeline has a number of options to allow you to run only specific For example, you can skip all of the assembly steps with the `--skip_assembly` parameter. See the [usage docs](docs/usage.md) for all of the available options when running the pipeline. +## Pipeline reporting + +Numerous QC and reporting steps are included in the pipeline in order to collate a full summary of the analysis within a single [MultiQC](https://multiqc.info/) report. You can see [an example MultiQC report here](https://raw.githack.com/nf-core/viralrecon/master/docs/html/multiqc_report.html), generated using the parameters defined in [this configuration file](https://github.com/nf-core/viralrecon/blob/master/conf/test_full.config). The pipeline was run with [these samples](https://zenodo.org/record/3735111), prepared from the [ncov-2019 ARTIC Network V1 amplicon set](https://artic.network/ncov-2019) and sequenced on the Illumina MiSeq platform in 301bp paired-end format. + ## Quick Start 1. Install [`nextflow`](https://nf-co.re/usage/installation) @@ -95,6 +95,7 @@ Many thanks to others who have helped out and contributed along the way too, inc | Name | Affiliation | |-----------------------------------------------------------|---------------------------------------------------------------------------------------| +| [Aengus Stewart](https://github.com/stewarta) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | | [Alexander Peltzer](https://github.com/apeltzer) | [Boehringer Ingelheim, Germany](https://www.boehringer-ingelheim.de/) | | [Alison Meynert](https://github.com/ameynert) | [University of Edinburgh, Scotland](https://www.ed.ac.uk/) | | [Edgar Garriga Nogales](https://github.com/edgano) | [Centre for Genomic Regulation, Spain](https://www.crg.eu/) | @@ -108,6 +109,7 @@ Many thanks to others who have helped out and contributed along the way too, inc | [Maxime Garcia](https://github.com/MaxUlysse) | [SciLifeLab, Sweden](https://www.scilifelab.se/) | | [Michael Heuer](https://github.com/heuermh) | [UC Berkeley, USA](https://https://rise.cs.berkeley.edu) | | [Phil Ewels](https://github.com/ewels) | [SciLifeLab, Sweden](https://www.scilifelab.se/) | +| [Richard Mitter](https://github.com/rjmitter) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | | [Simon Heumos](https://github.com/subwaystation) | [QBiC, University of Tübingen, Germany](https://portal.qbic.uni-tuebingen.de/portal/) | | [Stephen Kelly](https://github.com/stevekm) | [Memorial Sloan Kettering Cancer Center, USA](https://www.mskcc.org/) | | [Thanh Le Viet](https://github.com/thanhleviet) | [Quadram Institute, UK](https://quadram.ac.uk/) | From ad0fc5131822720200bfeb6eb3ab0b34b6770f09 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 15 Jun 2020 15:50:24 +0100 Subject: [PATCH 013/129] Update packages --- environment.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 22de35b6..b27512b4 100644 --- a/environment.yml +++ b/environment.yml @@ -13,8 +13,12 @@ dependencies: - conda-forge::pymdown-extensions=7.1 - conda-forge::pygments=2.6.1 - conda-forge::pigz=2.3.4 - - conda-forge::r-base=3.6.2 - conda-forge::bc=1.07.1 + - conda-forge::r-base=3.6.2 + - conda-forge::r-tidyr=1.1.0 + - conda-forge::r-optparse=1.6.6 + - conda-forge::r-ggplot2=3.3.1 + - conda-forge::r-reshape2=1.4.4 ## bioconda packages ## common @@ -29,6 +33,7 @@ dependencies: ## variants - bioconda::bowtie2=2.3.5.1 - bioconda::picard=2.22.8 + - bioconda::mosdepth=0.2.9 - bioconda::ivar=1.2.2 - bioconda::bcftools=1.9 - bioconda::varscan=2.4.4 From 582d46abcd14ae03f9ebc240da2e6325089e55b1 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 15 Jun 2020 16:14:17 +0100 Subject: [PATCH 014/129] Revert to get linting passing again --- .github/workflows/branch.yml | 50 ++++++++++++------------------------ 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 3b09502e..29f6994f 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -1,34 +1,16 @@ -name: nf-core branch protection -# This workflow is triggered on PRs to master branch on the repository -# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` -on: - pull_request: - branches: [master] - -jobs: - test: - runs-on: ubuntu-latest - steps: - # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - - name: Check PRs - if: github.repository == 'nf-core/viralrecon' - run: | - { [[ ${{github.event.pull_request.head.repo.full_name}} == nf-core/viralrecon ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 - with: - message: | - Hi @${{ github.event.pull_request.user.login }}, - - It looks like this pull-request is has been made against the ${{github.event.pull_request.head.repo.full_name}} `master` branch. - The `master` branch on nf-core repositories should always contain code from the latest release. - Beacuse of this, PRs to `master` are only allowed if they come from the ${{github.event.pull_request.head.repo.full_name}} `dev` branch. - - You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. - - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false +name: nf-core branch protection +# This workflow is triggered on PRs to master branch on the repository +# It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` +on: + pull_request: + branches: + - master + +jobs: + test: + runs-on: ubuntu-18.04 + steps: + # PRs are only ok if coming from an nf-core `dev` branch or a fork `patch` branch + - name: Check PRs + run: | + { [[ $(git remote get-url origin) == *nf-core/viralrecon ]] && [[ ${GITHUB_HEAD_REF} = "dev" ]]; } || [[ ${GITHUB_HEAD_REF} == "patch" ]] From 066ead69f08ae83eadaefb9cb66dbc2921030040 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 15 Jun 2020 16:21:12 +0100 Subject: [PATCH 015/129] Shorten job names --- .github/workflows/ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8639a4dc..bec41d00 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ on: jobs: test: - name: Run default workflow tests + name: Test default workflow # Only run on push if this is the nf-core dev branch (merged PRs) if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} runs-on: ubuntu-latest @@ -53,7 +53,7 @@ jobs: nextflow run ${GITHUB_WORKSPACE} -profile test,docker parameters: - name: Run workflow tests for multiple parameters + name: Test workflow parameters if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} runs-on: ubuntu-latest env: @@ -93,7 +93,7 @@ jobs: nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} test_sra: - name: Run workflow tests on SRA downloaded data + name: Test SRA workflow if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} runs-on: ubuntu-latest env: @@ -133,7 +133,7 @@ jobs: nextflow run ${GITHUB_WORKSPACE} -profile test_sra,docker ${{ matrix.parameters }} test_sispa: - name: Run workflow tests for SISPA data + name: Test SISPA workflow if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} runs-on: ubuntu-latest env: From 88be4c6b10c1b05292f1a5423e8f4f1e6de760e8 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 15 Jun 2020 17:56:47 +0100 Subject: [PATCH 016/129] Update CHANGELOG --- CHANGELOG.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb470df9..ef10e8cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,26 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unpublished Version / DEV] +### `Added` + +* [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub + +### `Dependencies` + +* Add mosdepth `0.2.6` +* Add bioconductor-complexheatmap `2.2.0` +* Add r-tidyr `1.1.0` +* Add r-tidyverse `1.3.0` +* Add r-ggplot2 `3.3.1` +* Add r-optparse `1.6.6` +* Add r-reshape2 `1.4.4` +* Add r-viridis `0.5.1` +* Update picard `2.22.8` -> `2.23.0` +* Update minia `3.2.3` -> `3.2.4` +* Update sra-tools `2.10.3` -> `2.10.7` +* Update bowtie2 `2.3.5.1` -> `2.4.1` +* Update plasmidid `1.5.2` -> `1.6.2` + ## [1.0.0] - 2020-06-01 Initial release of nf-core/viralrecon, created with the [nf-core](http://nf-co.re/) template. From 951cb0d7301cd44bf05623095b79b1d35b21c222 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 15 Jun 2020 17:56:56 +0100 Subject: [PATCH 017/129] Update environment --- environment.yml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/environment.yml b/environment.yml index b27512b4..74b1dc0e 100644 --- a/environment.yml +++ b/environment.yml @@ -16,41 +16,45 @@ dependencies: - conda-forge::bc=1.07.1 - conda-forge::r-base=3.6.2 - conda-forge::r-tidyr=1.1.0 + - conda-forge::r-tidyverse=1.3.0 - conda-forge::r-optparse=1.6.6 - conda-forge::r-ggplot2=3.3.1 - conda-forge::r-reshape2=1.4.4 + - conda-forge::r-viridis=0.5.1 + - conda-forge::r-tidyverse=1.3.0 ## bioconda packages ## common - bioconda::fastqc=0.11.9 - bioconda::parallel-fastq-dump=0.6.6 - - bioconda::sra-tools=2.10.3 + - bioconda::sra-tools=2.10.7 - bioconda::fastp=0.20.1 - bioconda::samtools=1.9 - bioconda::bedtools=2.29.2 - bioconda::multiqc=1.9 ## variants - - bioconda::bowtie2=2.3.5.1 + - bioconda::bowtie2=2.4.1 - bioconda::picard=2.22.8 - - bioconda::mosdepth=0.2.9 + - bioconda::mosdepth=0.2.6 - bioconda::ivar=1.2.2 - bioconda::bcftools=1.9 - bioconda::varscan=2.4.4 - bioconda::snpeff=4.5covid19 - bioconda::snpsift=4.3.1t + - bioconda::bioconductor-complexheatmap=2.2.0 ## assembly - bioconda::cutadapt=2.10 - bioconda::kraken2=2.0.9beta - bioconda::spades=3.14.0 - bioconda::unicycler=0.4.7 - - bioconda::minia=3.2.3 + - bioconda::minia=3.2.4 - bioconda::minimap2=2.17 - bioconda::seqwish=0.4.1 - bioconda::vg=1.24.0 - bioconda::quast=5.0.2 - bioconda::blast=2.9.0 - - bioconda::plasmidid=1.5.2 + - bioconda::plasmidid=1.6.2 - bioconda::bandage=0.8.1 - hcc::abacas=1.3.1 From 9662187ee9b65df7d0636356593d1a38f4899714 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 15 Jun 2020 18:02:32 +0100 Subject: [PATCH 018/129] Update packages again --- CHANGELOG.md | 6 +++--- environment.yml | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ef10e8cf..6b079145 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,16 +13,16 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Add mosdepth `0.2.6` * Add bioconductor-complexheatmap `2.2.0` +* Add r-optparse `1.6.6` * Add r-tidyr `1.1.0` * Add r-tidyverse `1.3.0` * Add r-ggplot2 `3.3.1` -* Add r-optparse `1.6.6` * Add r-reshape2 `1.4.4` * Add r-viridis `0.5.1` -* Update picard `2.22.8` -> `2.23.0` -* Update minia `3.2.3` -> `3.2.4` * Update sra-tools `2.10.3` -> `2.10.7` * Update bowtie2 `2.3.5.1` -> `2.4.1` +* Update picard `2.22.8` -> `2.23.0` +* Update minia `3.2.3` -> `3.2.4` * Update plasmidid `1.5.2` -> `1.6.2` ## [1.0.0] - 2020-06-01 diff --git a/environment.yml b/environment.yml index 74b1dc0e..452d315f 100644 --- a/environment.yml +++ b/environment.yml @@ -15,13 +15,12 @@ dependencies: - conda-forge::pigz=2.3.4 - conda-forge::bc=1.07.1 - conda-forge::r-base=3.6.2 + - conda-forge::r-optparse=1.6.6 - conda-forge::r-tidyr=1.1.0 - conda-forge::r-tidyverse=1.3.0 - - conda-forge::r-optparse=1.6.6 - conda-forge::r-ggplot2=3.3.1 - conda-forge::r-reshape2=1.4.4 - conda-forge::r-viridis=0.5.1 - - conda-forge::r-tidyverse=1.3.0 ## bioconda packages ## common @@ -35,7 +34,7 @@ dependencies: ## variants - bioconda::bowtie2=2.4.1 - - bioconda::picard=2.22.8 + - bioconda::picard=2.23.0 - bioconda::mosdepth=0.2.6 - bioconda::ivar=1.2.2 - bioconda::bcftools=1.9 From e118bbb0926e4600efefa2c559bd54b5475eb678 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 15 Jun 2020 18:16:26 +0100 Subject: [PATCH 019/129] Add mosdepth --- CITATIONS.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CITATIONS.md b/CITATIONS.md index ef6aa270..485968c6 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -48,6 +48,9 @@ * [Minimap2](https://www.ncbi.nlm.nih.gov/pubmed/29750242/) > Li H. Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics. 2018 Sep 15;34(18):3094-3100. doi: 10.1093/bioinformatics/bty191. PubMed PMID: 29750242; PubMed Central PMCID: PMC6137996. +* [mosdepth](https://www.ncbi.nlm.nih.gov/pubmed/29096012) + > Pedersen BS, Quinlan AR. Mosdepth: Quick Coverage Calculation for Genomes and Exomes. Bioinformatics. 2018 Mar 1;34(5):867-868. doi: 10.1093/bioinformatics/btx699. PMID: 29096012 PMCID: PMC6030888. + * [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. From 4e4bb152cc9e2fc13d25c07d3107d4edcc776249 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 15 Jun 2020 20:18:34 +0100 Subject: [PATCH 020/129] Initial commit --- bin/plot_mosdepth_regions.r | 76 +++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100755 bin/plot_mosdepth_regions.r diff --git a/bin/plot_mosdepth_regions.r b/bin/plot_mosdepth_regions.r new file mode 100755 index 00000000..f2c70a8c --- /dev/null +++ b/bin/plot_mosdepth_regions.r @@ -0,0 +1,76 @@ +#!/usr/bin/env Rscript + +################################################ +################################################ +## LOAD LIBRARIES ## +################################################ +################################################ + +library(optparse) +library(ggplot2) +library(scales) + +################################################ +################################################ +## PARSE COMMAND-LINE PARAMETERS ## +################################################ +################################################ + +option_list <- list(make_option(c("-i", "--region_file"), type="character", default=NULL, help="mosdepth regions output file (typically ends with *.regions.bed.gz)", metavar="path"), + make_option(c("-s", "--sample_name"), type="character", default=NULL, help="Sample name for plot title. If not provided will be extracted from --region_file", metavar="string"), + make_option(c("-o", "--out_file"), type="character", default=NULL, help="Full path to pdf output file", metavar="path")) + +opt_parser <- OptionParser(option_list=option_list) +opt <- parse_args(opt_parser) + +if (is.null(opt$region_file)){ + print_help(opt_parser) + stop("At least one mosdepth region file must be supplied", call.=FALSE) +} + +SAMPLE_NAME = opt$sample_name +if (is.null(opt$sample_name)){ + SAMPLE_NAME = gsub('.regions.bed.gz','',basename(opt$region_file)) +} + +OUT_FILE = opt$out_file +if (is.null(opt$out_file)){ + OUT_FILE = gsub('.gz','.pdf',opt$region_file) +} +if (file.exists(dirname(OUT_FILE)) == FALSE) { + dir.create(dirname(OUT_FILE),recursive=TRUE) +} + +################################################ +################################################ +## PLOT COVERAGE ## +################################################ +################################################ + +## Read in data +dat <- read.csv(gzfile(opt$region_file,'r'),sep="\t", header=FALSE) +colnames(dat) <- c('chrom', 'start','end', 'coverage') +dat$coverage <- dat$coverage + 1 + +## Coverage plot +plot <- ggplot(dat,aes(x=end,y=coverage)) + + geom_ribbon(aes(ymin = 0, ymax = coverage), data =) + + theme_bw() + + scale_x_continuous(expand = c(0, 0)) + + scale_y_continuous(trans = log10_trans(), + breaks = trans_breaks("log10", function(x) 10^x), + labels = trans_format("log10", math_format(10^.x)), + expand = c(0, 0)) + + ylab(bquote('log'[10]~'(Coverage+1)')) + + xlab("Position (bp)") + + ggtitle(paste(SAMPLE_NAME,"coverage")) + +## Export plot to file +pdf(file=OUT_FILE,height=6,width=12) +print(plot) +dev.off() + +################################################ +################################################ +################################################ +################################################ From acfc523f35cb9174c383b01cede82a14c8818688 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 15 Jun 2020 20:31:23 +0100 Subject: [PATCH 021/129] Update parameter --- bin/plot_mosdepth_regions.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/plot_mosdepth_regions.r b/bin/plot_mosdepth_regions.r index f2c70a8c..cc2abd4f 100755 --- a/bin/plot_mosdepth_regions.r +++ b/bin/plot_mosdepth_regions.r @@ -18,7 +18,7 @@ library(scales) option_list <- list(make_option(c("-i", "--region_file"), type="character", default=NULL, help="mosdepth regions output file (typically ends with *.regions.bed.gz)", metavar="path"), make_option(c("-s", "--sample_name"), type="character", default=NULL, help="Sample name for plot title. If not provided will be extracted from --region_file", metavar="string"), - make_option(c("-o", "--out_file"), type="character", default=NULL, help="Full path to pdf output file", metavar="path")) + make_option(c("-o", "--out_file"), type="character", default=NULL, help="Full path to pdf output file. If not provide will be extracted from --region_file", metavar="path")) opt_parser <- OptionParser(option_list=option_list) opt <- parse_args(opt_parser) From 2857842cb7d5206a8c81521387ca4eb94910bc28 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 12:13:05 +0100 Subject: [PATCH 022/129] Update CHANGELOG --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b079145..42b7667e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` +* [#112](https://github.com/nf-core/viralrecon/issues/112) - Per-amplicon coverage plot * [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub +* Parameters: + * `--skip_mosdepth` to skip genome-wide and amplicon coverage plot generation from mosdepth output + +### `Removed` + +* Parameters: + * `--skip_qc` ### `Dependencies` From 150f61dfe98f5f29c3bc6ad435f2f8067807cae5 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 12:13:16 +0100 Subject: [PATCH 023/129] Add --skip_mosdepth --- docs/usage.md | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 7089eaeb..2c913ee5 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -46,6 +46,8 @@ * [`--save_align_intermeds`](#--save_align_intermeds) * [`--save_mpileup`](#--save_mpileup) * [`--skip_markduplicates`](#--skip_markduplicates) + * [`--skip_picard_metrics`](#--skip_picard_metrics) + * [`--skip_mosdepth`](#--skip_mosdepth) * [`--skip_snpeff`](#--skip_snpeff) * [`--skip_variants_quast`](#--skip_variants_quast) * [`--skip_variants`](#--skip_variants) @@ -60,9 +62,7 @@ * [`--skip_assembly`](#--skip_assembly) * [Skipping QC steps](#skipping-qc-steps) * `--skip_fastqc` - * `--skip_picard_metrics` * `--skip_multiqc` - * `--skip_qc` * [Job resources](#job-resources) * [Automatic resubmission](#automatic-resubmission) * [Custom resource requests](#custom-resource-requests) @@ -394,6 +394,14 @@ Save Pileup files in the results directory. These tend to be quite large so are Skip picard MarkDuplicates step (Default: false). +### `--skip_picard_metrics` + +Skip Picard CollectMultipleMetrics and CollectWgsMetrics (Default: false). + +### `--skip_mosdepth` + +Skip genome-wide and amplicon coverage plot generation from mosdepth output (Default: false). + ### `--skip_snpeff` Skip SnpEff and SnpSift annotation of variants (Default: false). @@ -447,9 +455,7 @@ The pipeline contains a large number of quality control steps. Sometimes, it may | Step | Description | |---------------------------|----------------------------------------------------------| | `--skip_fastqc` | Skip FastQC | -| `--skip_picard_metrics` | Skip Picard CollectMultipleMetrics and CollectWgsMetrics | | `--skip_multiqc` | Skip MultiQC | -| `--skip_qc` | Skip all QC steps except for MultiQC | ## Job resources From 35b49697a8992c59ead53eb9946b984b620f446d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 12:13:30 +0100 Subject: [PATCH 024/129] Add mosdepth --- main.nf | 54 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/main.nf b/main.nf index c2d48f58..3b85802d 100644 --- a/main.nf +++ b/main.nf @@ -66,6 +66,8 @@ def helpMessage() { --save_align_intermeds [bool] Save the intermediate BAM files from the alignment steps (Default: false) --save_mpileup [bool] Save MPileup files generated during variant calling (Default: false) --skip_markduplicates [bool] Skip picard MarkDuplicates step (Default: false) + --skip_picard_metrics [bool] Skip Picard CollectMultipleMetrics and CollectWgsMetrics (Default: false) + --skip_mosdepth [bool] Skip genome-wide and amplicon coverage plot generation from mosdepth output (Default: false) --skip_snpeff [bool] Skip SnpEff and SnpSift annotation of variants (Default: false) --skip_variants_quast [bool] Skip generation of QUAST aggregated report for consensus sequences (Default: false) --skip_variants [bool] Skip variant calling steps in the pipeline (Default: false) @@ -82,9 +84,7 @@ def helpMessage() { QC --skip_fastqc [bool] Skip FastQC (Default: false) - --skip_picard_metrics Skip Picard CollectMultipleMetrics and CollectWgsMetrics (Default: false) --skip_multiqc [bool] Skip MultiQC (Default: false) - --skip_qc [bool] Skip all QC steps apart from MultiQC (Default: false) Other options: --outdir [file] The output directory where the results will be saved @@ -256,8 +256,10 @@ if (!params.skip_variants) { summary['Min Read Depth'] = params.min_coverage summary['Max Allele Freq'] = params.max_allele_freq if (params.save_align_intermeds) summary['Save Align Intermeds'] = 'Yes' - if (params.save_mpileup) summary['Save MPileup'] = 'Yes' + if (params.save_mpileup) summary['Save mpileup'] = 'Yes' if (params.skip_markduplicates) summary['Skip MarkDuplicates'] = 'Yes' + if (params.skip_picard_metrics) summary['Skip Picard Metrics'] = 'Yes' + if (params.skip_mosdepth) summary['Skip mosdepth'] = 'Yes' if (params.skip_snpeff) summary['Skip SnpEff'] = 'Yes' if (params.skip_variants_quast) summary['Skip Variants QUAST'] = 'Yes' } else { @@ -274,12 +276,7 @@ if (!params.skip_assembly) { } else { summary['Skip Assembly'] = 'Yes' } -if (!params.skip_qc) { - if (params.skip_fastqc) summary['Skip FastQC'] = 'Yes' - if (params.skip_picard_metrics) summary['Skip Picard Metrics'] = 'Yes' -} else { - summary['Skip QC'] = 'Yes' -} +if (params.skip_fastqc) summary['Skip FastQC'] = 'Yes' if (params.skip_multiqc) summary['Skip MultiQC'] = 'Yes' summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" @@ -679,7 +676,7 @@ process FASTQC { } when: - !params.skip_fastqc && !params.skip_qc + !params.skip_fastqc input: tuple val(sample), val(single_end), path(reads) from ch_cat_fastqc @@ -979,6 +976,7 @@ if (params.protocol != 'amplicon') { if (params.skip_markduplicates) { ch_ivar_trim_bam .into { ch_markdup_bam_metrics + ch_markdup_bam_mosdepth ch_markdup_bam_mpileup ch_markdup_bam_varscan2_consensus ch_markdup_bam_bcftools @@ -1007,6 +1005,7 @@ if (params.skip_markduplicates) { output: tuple val(sample), val(single_end), path("*.sorted.{bam,bam.bai}") into ch_markdup_bam_metrics, + ch_markdup_bam_mosdepth, ch_markdup_bam_mpileup, ch_markdup_bam_varscan2_consensus, ch_markdup_bam_bcftools, @@ -1049,7 +1048,7 @@ process PICARD_METRICS { publishDir "${params.outdir}/variants/bam/picard_metrics", mode: params.publish_dir_mode when: - !params.skip_variants && !params.skip_picard_metrics && !params.skip_qc + !params.skip_variants && !params.skip_picard_metrics input: tuple val(sample), val(single_end), path(bam) from ch_markdup_bam_metrics @@ -1086,6 +1085,35 @@ process PICARD_METRICS { """ } +/* + * STEP 5.6: mosdepth genome-wide and amplicon coverage plots + */ +process MOSDEPTH { + tag "$sample" + label 'process_medium' + publishDir "${params.outdir}/variants/bam/mosdepth", mode: params.publish_dir_mode + + when: + !params.skip_variants && !params.skip_mosdepth + + input: + tuple val(sample), val(single_end), path(bam) from ch_markdup_bam_mosdepth + + output: + path "*.{pdf,txt,gz,csi}" + + script: + suffix = params.skip_markduplicates ? "" : ".mkD" + prefix = params.protocol == 'amplicon' ? "${sample}.trim${suffix}" : "${sample}${suffix}" + """ + mosdepth --by 200 --fast-mode ${prefix}.genome ${bam[0]} + plot_mosdepth_regions.r \\ + --region_file "${prefix}.genome.regions.bed.gz" \\ + --sample_name $sample \\ + --out_file "${prefix}.genome.coverage.pdf" + """ +} + //////////////////////////////////////////////////// /* -- VARSCAN2 -- */ //////////////////////////////////////////////////// @@ -1175,7 +1203,7 @@ process VARSCAN2 { tabix -p vcf -f ${sample}.vcf.gz bcftools stats ${sample}.vcf.gz > ${sample}.bcftools_stats.txt sed -i.bak '/LC_ALL/d' ${sample}.varscan2.log - + bcftools filter \\ -i 'FORMAT/AD / (FORMAT/AD + FORMAT/RD) >= $params.max_allele_freq' \\ --output-type z \\ @@ -2998,7 +3026,7 @@ process get_software_versions { } output: - path 'software_versions_mqc.yaml' into ch_software_versions_yaml + path "software_versions_mqc.yaml" into ch_software_versions_yaml path "software_versions.csv" script: From 09e450f76c50a6b3de6a1e33a49eda25591804a7 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 12:13:33 +0100 Subject: [PATCH 025/129] Add --skip_mosdepth --- nextflow.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nextflow.config b/nextflow.config index bb498943..f72bb5a1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -49,6 +49,8 @@ params { save_align_intermeds = false save_mpileup = false skip_markduplicates = false + skip_picard_metrics = false + skip_mosdepth = false skip_snpeff = false skip_variants_quast = false skip_variants = false @@ -65,10 +67,8 @@ params { // Options: QC skip_fastqc = false - skip_picard_metrics = false skip_multiqc = false - skip_qc = false - + // Boilerplate options outdir = './results' publish_dir_mode = 'copy' From 991eeab5176c918ed6e55b865f79c322038fcff7 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 12:24:20 +0100 Subject: [PATCH 026/129] Add mosdepth description --- README.md | 5 +++-- main.nf | 26 +++++++++++++------------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 781a2d40..ac945803 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool 3. Primer sequence removal ([`iVar`](https://github.com/andersen-lab/ivar); *amplicon data only*) 4. Duplicate read marking ([`picard`](https://broadinstitute.github.io/picard/); *removal optional*) 5. Alignment-level QC ([`picard`](https://broadinstitute.github.io/picard/), [`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - 6. Choice of multiple variant calling and consensus sequence generation routes ([`VarScan 2`](http://dkoboldt.github.io/varscan/), [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/) *||* [`iVar variants and consensus`](https://github.com/andersen-lab/ivar) *||* [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/)) + 6. Genome-wide and amplicon coverage QC plots ([`mosdepth`](https://github.com/brentp/mosdepth/)) + 7. Choice of multiple variant calling and consensus sequence generation routes ([`VarScan 2`](http://dkoboldt.github.io/varscan/), [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/) *||* [`iVar variants and consensus`](https://github.com/andersen-lab/ivar) *||* [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/)) * Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) * Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) 6. _De novo_ assembly @@ -44,7 +45,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool * Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) 7. Present QC and visualisation for raw read, alignment, assembly and variant calling results ([`MultiQC`](http://multiqc.info/)) -Note that the pipeline has a number of options to allow you to run only specific aspects of the workflow if you so wish. +> **NB:** The pipeline has a number of options to allow you to run only specific aspects of the workflow if you so wish. For example, you can skip all of the assembly steps with the `--skip_assembly` parameter. See the [usage docs](docs/usage.md) for all of the available options when running the pipeline. diff --git a/main.nf b/main.nf index 3b85802d..9822715e 100644 --- a/main.nf +++ b/main.nf @@ -1119,7 +1119,7 @@ process MOSDEPTH { //////////////////////////////////////////////////// /* - * STEP 5.6: Create mpileup file for all variant callers + * STEP 5.7: Create mpileup file for all variant callers */ process SAMTOOLS_MPILEUP { tag "$sample" @@ -1157,7 +1157,7 @@ process SAMTOOLS_MPILEUP { } /* - * STEP 5.6.1: Variant calling with VarScan 2 + * STEP 5.7.1: Variant calling with VarScan 2 */ process VARSCAN2 { tag "$sample" @@ -1215,7 +1215,7 @@ process VARSCAN2 { } /* - * STEP 5.6.1.1: Genome consensus generation with BCFtools and masked with BEDTools + * STEP 5.7.1.1: Genome consensus generation with BCFtools and masked with BEDTools */ process VARSCAN2_CONSENSUS { tag "$sample" @@ -1254,7 +1254,7 @@ process VARSCAN2_CONSENSUS { } /* - * STEP 5.6.1.2: VarScan 2 variant calling annotation with SnpEff and SnpSift + * STEP 5.7.1.2: VarScan 2 variant calling annotation with SnpEff and SnpSift */ process VARSCAN2_SNPEFF { tag "$sample" @@ -1326,7 +1326,7 @@ process VARSCAN2_SNPEFF { } /* - * STEP 5.6.1.3: VarScan 2 consensus sequence report with QUAST + * STEP 5.7.1.3: VarScan 2 consensus sequence report with QUAST */ process VARSCAN2_QUAST { label 'process_medium' @@ -1360,7 +1360,7 @@ process VARSCAN2_QUAST { //////////////////////////////////////////////////// /* - * STEP 5.6.2: Variant calling with iVar + * STEP 5.7.2: Variant calling with iVar */ process IVAR_VARIANTS { tag "$sample" @@ -1411,7 +1411,7 @@ process IVAR_VARIANTS { } /* - * STEP 5.6.2.1: Generate consensus sequence with iVar + * STEP 5.7.2.1: Generate consensus sequence with iVar */ process IVAR_CONSENSUS { tag "$sample" @@ -1439,7 +1439,7 @@ process IVAR_CONSENSUS { } /* - * STEP 5.6.2.2: iVar variant calling annotation with SnpEff and SnpSift + * STEP 5.7.2.2: iVar variant calling annotation with SnpEff and SnpSift */ process IVAR_SNPEFF { tag "$sample" @@ -1511,7 +1511,7 @@ process IVAR_SNPEFF { } /* - * STEP 5.6.2.3: iVar consensus sequence report with QUAST + * STEP 5.7.2.3: iVar consensus sequence report with QUAST */ process IVAR_QUAST { label 'process_medium' @@ -1545,7 +1545,7 @@ process IVAR_QUAST { //////////////////////////////////////////////////// /* - * STEP 5.6.3: Variant calling with BCFTools + * STEP 5.7.3: Variant calling with BCFTools */ process BCFTOOLS_VARIANTS { tag "$sample" @@ -1588,7 +1588,7 @@ process BCFTOOLS_VARIANTS { } /* - * STEP 5.6.3.1: Genome consensus generation with BCFtools and masked with BEDTools + * STEP 5.7.3.1: Genome consensus generation with BCFtools and masked with BEDTools */ process BCFTOOLS_CONSENSUS { tag "$sample" @@ -1627,7 +1627,7 @@ process BCFTOOLS_CONSENSUS { } /* - * STEP 5.6.3.2: BCFTools variant calling annotation with SnpEff and SnpSift + * STEP 5.7.3.2: BCFTools variant calling annotation with SnpEff and SnpSift */ process BCFTOOLS_SNPEFF { tag "$sample" @@ -1674,7 +1674,7 @@ process BCFTOOLS_SNPEFF { } /* - * STEP 5.6.3.3: BCFTools consensus sequence report with QUAST + * STEP 5.7.3.3: BCFTools consensus sequence report with QUAST */ process BCFTOOLS_QUAST { label 'process_medium' From 3104925086a98f8a20da652c11066293420eacd4 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 17:26:38 +0100 Subject: [PATCH 027/129] Update for multiple samples --- bin/plot_mosdepth_regions.r | 162 ++++++++++++++++++++++++++++-------- 1 file changed, 125 insertions(+), 37 deletions(-) diff --git a/bin/plot_mosdepth_regions.r b/bin/plot_mosdepth_regions.r index cc2abd4f..813bed63 100755 --- a/bin/plot_mosdepth_regions.r +++ b/bin/plot_mosdepth_regions.r @@ -9,66 +9,154 @@ library(optparse) library(ggplot2) library(scales) +library(ComplexHeatmap) +library(viridis) +library(tidyverse) ################################################ ################################################ -## PARSE COMMAND-LINE PARAMETERS ## +## VALIDATE COMMAND-LINE PARAMETERS ## ################################################ ################################################ -option_list <- list(make_option(c("-i", "--region_file"), type="character", default=NULL, help="mosdepth regions output file (typically ends with *.regions.bed.gz)", metavar="path"), - make_option(c("-s", "--sample_name"), type="character", default=NULL, help="Sample name for plot title. If not provided will be extracted from --region_file", metavar="string"), - make_option(c("-o", "--out_file"), type="character", default=NULL, help="Full path to pdf output file. If not provide will be extracted from --region_file", metavar="path")) +option_list <- list(make_option(c("-i", "--input_files"), type="character", default=NULL, help="Comma-separated list of mosdepth regions output file (typically end in *.regions.bed.gz)", metavar="input_files"), + make_option(c("-s", "--input_suffix"), type="character", default='.regions.bed.gz', help="Portion of filename after sample name to trim for plot title e.g. '.regions.bed.gz' if 'SAMPLE1.regions.bed.gz'", metavar="input_suffix"), + make_option(c("-o", "--output_dir"), type="character", default='./', help="Output directory", metavar="path"), + make_option(c("-p", "--output_suffix"), type="character", default='regions', help="Output suffix", metavar="output_suffix")) opt_parser <- OptionParser(option_list=option_list) opt <- parse_args(opt_parser) -if (is.null(opt$region_file)){ +## Check input files +INPUT_FILES <- unique(unlist(strsplit(opt$input_files,","))) +if (length(INPUT_FILES) == 0) { print_help(opt_parser) - stop("At least one mosdepth region file must be supplied", call.=FALSE) + stop("At least one input file must be supplied", call.=FALSE) } - -SAMPLE_NAME = opt$sample_name -if (is.null(opt$sample_name)){ - SAMPLE_NAME = gsub('.regions.bed.gz','',basename(opt$region_file)) +if (!all(file.exists(INPUT_FILES))) { + stop(paste("The following input files don't exist:",paste(INPUT_FILES[!file.exists(INPUT_FILES)], sep='', collapse=' '), sep=' '), call.=FALSE) } -OUT_FILE = opt$out_file -if (is.null(opt$out_file)){ - OUT_FILE = gsub('.gz','.pdf',opt$region_file) +## Check the output directory has a trailing slash, if not add one +OUTDIR <- opt$output_dir +if (tail(strsplit(OUTDIR,"")[[1]],1)!="/") { + OUTDIR <- paste(OUTDIR,"/",sep='') } -if (file.exists(dirname(OUT_FILE)) == FALSE) { - dir.create(dirname(OUT_FILE),recursive=TRUE) +## Create the directory if it doesn't already exist. +if (!file.exists(OUTDIR)) { + dir.create(OUTDIR,recursive=TRUE) } +#bin/plot_mosdepth_regions.r -i /camp/stp/babs/working/patelh/code/nextflow/nfcore/viralrecon/results/variants/bam/mosdepth/amplicon/SAMPLE1_PE.trim.mkD.amplicon.regions.bed.gz,/camp/stp/babs/working/patelh/code/nextflow/nfcore/viralrecon/results/variants/bam/mosdepth/amplicon/SAMPLE3_SE.trim.mkD.amplicon.regions.bed.gz -s .trim.mkD.amplicon.regions.bed.gz -o ./test + ################################################ ################################################ -## PLOT COVERAGE ## +## READ IN DATA ## ################################################ ################################################ ## Read in data -dat <- read.csv(gzfile(opt$region_file,'r'),sep="\t", header=FALSE) -colnames(dat) <- c('chrom', 'start','end', 'coverage') -dat$coverage <- dat$coverage + 1 - -## Coverage plot -plot <- ggplot(dat,aes(x=end,y=coverage)) + - geom_ribbon(aes(ymin = 0, ymax = coverage), data =) + - theme_bw() + - scale_x_continuous(expand = c(0, 0)) + - scale_y_continuous(trans = log10_trans(), - breaks = trans_breaks("log10", function(x) 10^x), - labels = trans_format("log10", math_format(10^.x)), - expand = c(0, 0)) + - ylab(bquote('log'[10]~'(Coverage+1)')) + - xlab("Position (bp)") + - ggtitle(paste(SAMPLE_NAME,"coverage")) - -## Export plot to file -pdf(file=OUT_FILE,height=6,width=12) -print(plot) -dev.off() +dat <- NULL +for (input_file in INPUT_FILES) { + sample = gsub(opt$input_suffix,'',basename(input_file)) + dat <- rbind(dat, cbind(read.delim(input_file, header=FALSE, sep='\t', stringsAsFactors=FALSE, check.names=FALSE)[,-6], sample, stringsAsFactors=F)) +} + +## Reformat table +if (ncol(dat) == 6) { + colnames(dat) <- c('chrom', 'start','end', 'region', 'coverage', 'sample') + dat$region <- factor(dat$region, levels=unique(dat$region[order(dat$start, decreasing=TRUE)])) +} else { + colnames(dat) <- c('chrom', 'start','end', 'coverage', 'sample') +} +dat$sample <- factor(dat$sample, levels=sort(unique(dat$sample))) + +## Write merged coverage data for all samples to file +outfile <- paste(OUTDIR,"all_samples.",opt$output_suffix,".coverage.tsv", sep='') +write.table(dat, file=outfile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) + +################################################ +################################################ +## PER-SAMPLE COVERAGE PLOTS ## +################################################ +################################################ + +for (sample in unique(dat$sample)) { + sample_dat <- dat[dat$sample == sample,] + outfile <- paste(OUTDIR,sample,".",opt$output_suffix,".coverage.tsv", sep='') + write.table(sample_dat,file=outfile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) + sample_dat$coverage <- sample_dat$coverage + 1 + + if (ncol(sample_dat) == 6) { + plot <- ggplot(sample_dat,aes(x=region,y=coverage)) + + geom_bar(stat="identity", fill="#D55E00", width=0.6) + + theme_bw() + + theme(plot.title=element_text(size=10), + axis.text.x=element_text(size=10), + axis.text.y=element_text(size=8)) + + coord_flip() + + scale_x_discrete(expand=c(0, 0)) + + scale_y_continuous(trans=log10_trans(), + breaks=trans_breaks('log10', function(x) 10^x), + labels=trans_format('log10', math_format(10^.x)), + expand=c(0, 0)) + + ylab(bquote('log'[10]~'(Coverage+1)')) + + xlab('Amplicon') + + ggtitle(paste(sample,'per amplicon coverage')) + + outfile <- paste(OUTDIR,sample,".",opt$output_suffix,".coverage.pdf", sep='') + ggsave(file=outfile, plot, width=16, height=3+(0.3*length(unique(sample_dat$region))), units="cm") + } else { + plot <- ggplot(sample_dat,aes(x=end,y=coverage)) + + geom_ribbon(aes(ymin=0, ymax=coverage), fill="#D55E00", data=) + + theme_bw() + + scale_x_continuous(expand=c(0, 0)) + + scale_y_continuous(trans=log10_trans(), + breaks=trans_breaks('log10', function(x) 10^x), + labels=trans_format('log10', math_format(10^.x)), + expand=c(0, 0)) + + ylab(bquote('log'[10]~'(Coverage+1)')) + + xlab('Position (bp)') + + ggtitle(paste(SAMPLE_NAME,'coverage')) + + outfile <- paste(OUTDIR,sample,".",opt$output_suffix,".coverage.pdf", sep='') + ggsave(file=outfile, plot, width=12, height=6, units="cm") + } +} + +################################################ +################################################ +## REGION-BASED HEATMAP ACROSS ALL SAMPLES ## +################################################ +################################################ + +if (ncol(dat) == 6 && length(INPUT_FILES) > 1) { + mat <- spread(dat[,c("sample", "region", "coverage")], sample, coverage, fill=NA, convert=FALSE) + rownames(mat) <- mat[,1] + mat <- as.matrix(log10(mat[,-1] + 1)) + heatmap <- Heatmap(mat, + name = "log10(Coverage+1)", + cluster_rows = FALSE, + cluster_columns = FALSE, + show_row_names = TRUE, + show_column_names = TRUE, + column_names_side = "bottom", + rect_gp = gpar(col="white", lwd=1), + show_heatmap_legend = TRUE, + row_names_gp = gpar(fontsize=6), + column_names_gp = gpar(fontsize=6), + height = unit(5, "mm")*nrow(mat), + width = unit(5, "mm")*ncol(mat), + col = viridis(50)) + + ## Size of heatmaps scaled based on matrix dimensions: https://jokergoo.github.io/ComplexHeatmap-reference/book/other-tricks.html#set-the-same-cell-size-for-different-heatmaps-with-different-dimensions + width = 0.1969*ncol(mat) + (2*1.3150) + height = 0.1969*nrow(mat) + 1.3150 + outfile <- paste(OUTDIR,"all_samples.",opt$output_suffix,".heatmap.pdf", sep='') + pdf(file=outfile, width=width, height=height) + draw(heatmap) + dev.off() +} ################################################ ################################################ From cefc5afd68f7768602e62f7936cda0f57da29da2 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Tue, 16 Jun 2020 20:16:06 +0200 Subject: [PATCH 028/129] update github actions workflow --- .github/workflows/awsfulltest.yml | 35 +++++++++++++++++++++++++++++++ .github/workflows/awstest.yml | 26 ++++++++++++++--------- 2 files changed, 51 insertions(+), 10 deletions(-) create mode 100644 .github/workflows/awsfulltest.yml diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml new file mode 100644 index 00000000..8c385069 --- /dev/null +++ b/.github/workflows/awsfulltest.yml @@ -0,0 +1,35 @@ +name: nf-core AWS full test +# This workflow is triggered on releases. +# It runs the -profile 'test_full' on AWS batch + +on: + release: + types: [published] + +jobs: + run-awstest: + name: Run AWS test + runs-on: ubuntu-latest + steps: + - name: Setup Miniconda + uses: goanpeca/setup-miniconda@v1.0.2 + with: + auto-update-conda: true + python-version: 3.7 + - name: Install awscli + run: conda install -c conda-forge awscli + - name: Start AWS batch job + env: + AWS_ACCESS_KEY_ID: ${{secrets.AWSTEST_KEY_ID}} + AWS_SECRET_ACCESS_KEY: ${{secrets.AWSTEST_KEY_SECRET}} + TOWER_ACCESS_TOKEN: ${{secrets.AWSTEST_TOWER_TOKEN}} + #AWS_JOB_DEFINITION: ${{secrets.AWS_JOB_DEFINITION}} + AWS_JOB_QUEUE: ${{secrets.AWS_JOB_QUEUE}} + AWS_S3_BUCKET: ${{secrets.AWS_S3_BUCKET}} + run: | # Submits job to AWS batch using a 'nextflow-4GiB' job definition. Setting JVM options to "-XX:+UseG1GC" for more efficient garbage collection when staging remote files. + aws batch submit-job \ + --region eu-west-1 \ + --job-name nf-core-viralrecon \ + --job-queue $AWS_JOB_QUEUE \ + --job-definition nextflow-4GiB \ + --container-overrides '{"command": ["nf-core/viralrecon", "-r '"${GITHUB_SHA}"' -profile test_full --outdir s3://'"${AWS_S3_BUCKET}"'/viralrecon/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/viralrecon/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}, {"name": "NXF_OPTS", "value": "-XX:+UseG1GC"}]}' \ No newline at end of file diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 8da4fb05..eec1b547 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -1,14 +1,12 @@ name: nf-core AWS test -# This workflow is triggered on PRs to the master branch. -# It runs the -profile 'test_full' on AWS batch +# This workflow is triggered on push to the master branch. +# It runs the -profile 'test' on AWS batch on: push: branches: - master - - dev - release: - types: [published] + - dev # only for testing purposes, to be removed jobs: run-awstest: @@ -24,8 +22,16 @@ jobs: run: conda install -c conda-forge awscli - name: Start AWS batch job env: - AWS_ACCESS_KEY_ID: ${{secrets.AWS_KEY_ID}} - AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_KEY_SECRET}} - TOWER_ACCESS_TOKEN: ${{secrets.TOWER_ACCESS_TOKEN}} - run: | # Submits job to AWS batch using a 'nextflow-big' instance. Setting JVM options to "-XX:+UseG1GC" for more efficient garbage collection when staging remote files. - aws batch submit-job --region eu-west-1 --job-name nf-core-viralrecon --job-queue 'default-8b3836e0-5eda-11ea-96e5-0a2c3f6a2a32' --job-definition nextflow-4GiB --container-overrides '{"command": ["nf-core/viralrecon", "-r '"${GITHUB_SHA}"' -profile test_full --outdir s3://nf-core-awsmegatests/viralrecon/results-'"${GITHUB_SHA}"' -w s3://nf-core-awsmegatests/viralrecon/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}, {"name": "NXF_OPTS", "value": "-XX:+UseG1GC"}]}' \ No newline at end of file + AWS_ACCESS_KEY_ID: ${{secrets.AWSTEST_KEY_ID}} + AWS_SECRET_ACCESS_KEY: ${{secrets.AWSTEST_KEY_SECRET}} + TOWER_ACCESS_TOKEN: ${{secrets.AWSTEST_TOWER_TOKEN}} + #AWS_JOB_DEFINITION: ${{secrets.AWS_JOB_DEFINITION}} + AWS_JOB_QUEUE: ${{secrets.AWS_JOB_QUEUE}} + AWS_S3_BUCKET: ${{secrets.AWS_S3_BUCKET}} + run: | # Submits job to AWS batch using a 'nextflow-4GiB' job definition. Setting JVM options to "-XX:+UseG1GC" for more efficient garbage collection when staging remote files. + aws batch submit-job \ + --region eu-west-1 \ + --job-name nf-core-viralrecon \ + --job-queue $AWS_JOB_QUEUE \ + --job-definition nextflow-4GiB \ + --container-overrides '{"command": ["nf-core/viralrecon", "-r '"${GITHUB_SHA}"' -profile test --outdir s3://'"${AWS_S3_BUCKET}"'/viralrecon/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/viralrecon/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}, {"name": "NXF_OPTS", "value": "-XX:+UseG1GC"}]}' \ No newline at end of file From 25503fa915a368791c744f8dc94f51061c8d9bf8 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Tue, 16 Jun 2020 20:21:17 +0200 Subject: [PATCH 029/129] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b079145..5be60185 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` * [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub +* [#118] Updated GitHub Actions AWS workflow for small and full size tests. ### `Dependencies` From 93f0f07c3b5513005c7e80ee784cfd5eb4a8c39b Mon Sep 17 00:00:00 2001 From: ggabernet Date: Tue, 16 Jun 2020 20:24:37 +0200 Subject: [PATCH 030/129] update awsfulltest --- .github/workflows/awsfulltest.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 8c385069..2eecea24 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -8,6 +8,7 @@ on: jobs: run-awstest: + if: github.repository == 'nf-core/viralrecon' name: Run AWS test runs-on: ubuntu-latest steps: From 3deff445a72bd956a3bded7e723a652b546f2219 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 21:17:50 +0100 Subject: [PATCH 031/129] Initial commit --- bin/collapse_amplicon_bed.py | 72 ++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100755 bin/collapse_amplicon_bed.py diff --git a/bin/collapse_amplicon_bed.py b/bin/collapse_amplicon_bed.py new file mode 100755 index 00000000..d2ca7636 --- /dev/null +++ b/bin/collapse_amplicon_bed.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python +import os +import sys +import re +import errno +import argparse + + +def parse_args(args=None): + Description = 'Collapse LEFT/RIGHT primers in amplicon BED to single intervals.' + Epilog = """Example usage: python collapse_amplicon_bed.py """ + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument('FILE_IN', help="Input BED file.") + parser.add_argument('FILE_OUT', help="Output BED file.") + parser.add_argument('-ls', '--left_primer_suffix', type=str, dest="LEFT_PRIMER_SUFFIX", default='_LEFT', help="Suffix for left primer in name column of BED file (default: '_LEFT').") + parser.add_argument('-rs', '--right_primer_suffix', type=str, dest="RIGHT_PRIMER_SUFFIX", default='_RIGHT', help="Suffix for right primer in name column of BED file (default: '_RIGHT').") + return parser.parse_args(args) + + +def make_dir(path): + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + + +## See https://stackoverflow.com/a/480227 +def uniqify(seq): + seen = set() + seen_add = seen.add + return [x for x in seq if not (x in seen or seen_add(x))] + + +def collapse_amplicon_bed(FileIn,FileOut,LeftPrimerSuffix,RightPrimerSuffix): + StartPosList = [] + IntervalDict = {} + fin = open(FileIn,'r') + while True: + line = fin.readline() + if line: + chrom,start,end,name,score,strand = line.strip().split('\t') + amplicon = re.sub(r'(?:{}|{})'.format(LeftPrimerSuffix,RightPrimerSuffix),'',name) + if amplicon not in IntervalDict: + IntervalDict[amplicon] = [] + IntervalDict[amplicon].append((chrom,int(start),int(end),score)) + StartPosList.append((int(start),amplicon)) + else: + fin.close() + break + + fout = open(FileOut,'w') + for amplicon in uniqify([x[1] for x in sorted(StartPosList)]): + posList = [item for elem in IntervalDict[amplicon] for item in elem[1:3]] + chrom = IntervalDict[amplicon][0][0] + start = min(posList) + end = max(posList) + strand = '+' + score = IntervalDict[amplicon][0][3] + fout.write(f'{chrom}\t{start}\t{end}\t{amplicon}\t{score}\t{strand}\n') + fout.close() + + +def main(args=None): + args = parse_args(args) + collapse_amplicon_bed(args.FILE_IN,args.FILE_OUT,args.LEFT_PRIMER_SUFFIX,args.RIGHT_PRIMER_SUFFIX) + + +if __name__ == '__main__': + sys.exit(main()) From a5a502e2128e941c1155139a18396bca17fd2943 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 21:18:15 +0100 Subject: [PATCH 032/129] Initial commit --- bin/plot_mosdepth_dist.r | 110 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100755 bin/plot_mosdepth_dist.r diff --git a/bin/plot_mosdepth_dist.r b/bin/plot_mosdepth_dist.r new file mode 100755 index 00000000..a693cea7 --- /dev/null +++ b/bin/plot_mosdepth_dist.r @@ -0,0 +1,110 @@ +#!/usr/bin/env Rscript + +################################################ +################################################ +## LOAD LIBRARIES ## +################################################ +################################################ + +library(optparse) +library(ggplot2) +library(scales) + +################################################ +################################################ +## VALIDATE COMMAND-LINE PARAMETERS ## +################################################ +################################################ + +option_list <- list(make_option(c("-i", "--input_files"), type="character", default=NULL, help="Comma-separated list of mosdepth regions output file (typically end in *.mosdepth.global.dist.txt)", metavar="input_files"), + make_option(c("-s", "--input_suffix"), type="character", default='.mosdepth.global.dist.txt', help="Portion of filename after sample name to trim for plot title e.g. '.mosdepth.global.dist.txt' if 'SAMPLE1.mosdepth.global.dist.txt'", metavar="input_suffix"), + make_option(c("-o", "--output_dir"), type="character", default='./', help="Output directory", metavar="path"), + make_option(c("-p", "--output_suffix"), type="character", default='global.dist', help="Output suffix", metavar="output_suffix")) + +opt_parser <- OptionParser(option_list=option_list) +opt <- parse_args(opt_parser) + +## Check input files +INPUT_FILES <- unique(unlist(strsplit(opt$input_files,","))) +if (length(INPUT_FILES) == 0) { + print_help(opt_parser) + stop("At least one input file must be supplied", call.=FALSE) +} +if (!all(file.exists(INPUT_FILES))) { + stop(paste("The following input files don't exist:",paste(INPUT_FILES[!file.exists(INPUT_FILES)], sep='', collapse=' '), sep=' '), call.=FALSE) +} + +## Check the output directory has a trailing slash, if not add one +OUTDIR <- opt$output_dir +if (tail(strsplit(OUTDIR,"")[[1]],1)!="/") { + OUTDIR <- paste(OUTDIR,"/",sep='') +} +## Create the directory if it doesn't already exist. +if (!file.exists(OUTDIR)) { + dir.create(OUTDIR,recursive=TRUE) +} + +################################################ +################################################ +## READ IN DATA ## +################################################ +################################################ + +## Read in data +dat <- NULL +for (input_file in INPUT_FILES) { + sample = gsub(opt$input_suffix,'',basename(input_file)) + dat <- rbind(dat, cbind(read.delim(input_file, header=FALSE, sep='\t', stringsAsFactors=FALSE, check.names=FALSE)[,-4], sample, stringsAsFactors=F)) +} +colnames(dat) <- c('chrom', 'coverage', 'frequency', 'sample') +dat <- dat[which(dat$chrom == 'total'),][,2:ncol(dat)] + +################################################ +################################################ +## PER-SAMPLE COVERAGE PLOTS ## +################################################ +################################################ + +for (sample in unique(dat$sample)) { + sample_dat <- dat[dat$sample == sample,] + plot <- ggplot(sample_dat,aes(x=coverage,y=frequency)) + + geom_line(stat="identity") + + theme_bw() + + scale_x_continuous(expand=c(0, 0)) + + scale_y_continuous(limits=c(0,1), + breaks=seq(0,1,0.2), + labels=seq(0,1,0.2), + expand=c(0, 0)) + + ylab('Proportion of genome at coverage') + + xlab('Coverage') + + ggtitle(paste(sample,' genome coverage')) + + outfile <- paste(OUTDIR,sample,".",opt$output_suffix,".coverage.pdf", sep='') + ggsave(file=outfile, plot, height=4, width=8, units="in") +} + +################################################ +################################################ +## COVERAGE PLOT ACROSS ALL SAMPLES ## +################################################ +################################################ + +plot <- ggplot(dat,aes(x=coverage,y=frequency,colour=sample)) + + geom_line(stat="identity") + + theme_bw() + + scale_x_continuous(expand=c(0, 0)) + + scale_y_continuous(limits=c(0,1), + breaks=seq(0,1,0.2), + labels=seq(0,1,0.2), + expand=c(0, 0)) + + ylab('Proportion of genome at coverage') + + xlab('Coverage') + + ggtitle(paste('All samples genome coverage')) + +outfile <- paste(OUTDIR,"all_samples.",opt$output_suffix,".coverage.pdf", sep='') +ggsave(file=outfile, plot, height=6, width=12, units="in") + +################################################ +################################################ +################################################ +################################################ From 88b4be462bf49023b8e9df102233d1c60631c2ee Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 21:18:58 +0100 Subject: [PATCH 033/129] Update script --- bin/plot_mosdepth_regions.r | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/plot_mosdepth_regions.r b/bin/plot_mosdepth_regions.r index 813bed63..ca075d39 100755 --- a/bin/plot_mosdepth_regions.r +++ b/bin/plot_mosdepth_regions.r @@ -47,8 +47,6 @@ if (!file.exists(OUTDIR)) { dir.create(OUTDIR,recursive=TRUE) } -#bin/plot_mosdepth_regions.r -i /camp/stp/babs/working/patelh/code/nextflow/nfcore/viralrecon/results/variants/bam/mosdepth/amplicon/SAMPLE1_PE.trim.mkD.amplicon.regions.bed.gz,/camp/stp/babs/working/patelh/code/nextflow/nfcore/viralrecon/results/variants/bam/mosdepth/amplicon/SAMPLE3_SE.trim.mkD.amplicon.regions.bed.gz -s .trim.mkD.amplicon.regions.bed.gz -o ./test - ################################################ ################################################ ## READ IN DATA ## From 8d9baebf35ff26b8e7c73f0fd7a3a7c210cd13a2 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 21:19:06 +0100 Subject: [PATCH 034/129] Minor updates --- main.nf | 53 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 12 deletions(-) diff --git a/main.nf b/main.nf index 9822715e..91b8a297 100644 --- a/main.nf +++ b/main.nf @@ -976,7 +976,8 @@ if (params.protocol != 'amplicon') { if (params.skip_markduplicates) { ch_ivar_trim_bam .into { ch_markdup_bam_metrics - ch_markdup_bam_mosdepth + ch_markdup_bam_mosdepth_genome + ch_markdup_bam_mosdepth_amplicon ch_markdup_bam_mpileup ch_markdup_bam_varscan2_consensus ch_markdup_bam_bcftools @@ -1005,7 +1006,8 @@ if (params.skip_markduplicates) { output: tuple val(sample), val(single_end), path("*.sorted.{bam,bam.bai}") into ch_markdup_bam_metrics, - ch_markdup_bam_mosdepth, + ch_markdup_bam_mosdepth_genome, + ch_markdup_bam_mosdepth_amplicon, ch_markdup_bam_mpileup, ch_markdup_bam_varscan2_consensus, ch_markdup_bam_bcftools, @@ -1086,33 +1088,60 @@ process PICARD_METRICS { } /* - * STEP 5.6: mosdepth genome-wide and amplicon coverage plots + * STEP 5.6.1: mosdepth genome-wide coverage plots */ -process MOSDEPTH { +process MOSDEPTH_GENOME { tag "$sample" label 'process_medium' - publishDir "${params.outdir}/variants/bam/mosdepth", mode: params.publish_dir_mode + publishDir "${params.outdir}/variants/bam/mosdepth/genome", mode: params.publish_dir_mode when: !params.skip_variants && !params.skip_mosdepth input: - tuple val(sample), val(single_end), path(bam) from ch_markdup_bam_mosdepth + tuple val(sample), val(single_end), path(bam) from ch_markdup_bam_mosdepth_genome output: path "*.{pdf,txt,gz,csi}" script: suffix = params.skip_markduplicates ? "" : ".mkD" - prefix = params.protocol == 'amplicon' ? "${sample}.trim${suffix}" : "${sample}${suffix}" + prefix = params.protocol == 'amplicon' ? "${sample}.trim${suffix}.genome" : "${sample}${suffix}.genome" + """ + mosdepth --by 200 --fast-mode $prefix ${bam[0]} + plot_mosdepth_regions.r --region_file "${prefix}.regions.bed.gz" --sample_name $sample + """ +} + +/* + * STEP 5.6.2: mosdepth amplicon coverage plots + */ +process MOSDEPTH_AMPLICON { + tag "$sample" + label 'process_medium' + publishDir "${params.outdir}/variants/bam/mosdepth/amplicon", mode: params.publish_dir_mode + + when: + !params.skip_variants && !params.skip_mosdepth && params.protocol == 'amplicon' + + input: + tuple val(sample), val(single_end), path(bam) from ch_markdup_bam_mosdepth_amplicon + path bed from ch_amplicon_bed + + output: + path "*.{pdf,txt,gz,csi}" + + script: + suffix = params.skip_markduplicates ? "" : ".mkD" + prefix = "${sample}.trim${suffix}.amplicon" """ - mosdepth --by 200 --fast-mode ${prefix}.genome ${bam[0]} - plot_mosdepth_regions.r \\ - --region_file "${prefix}.genome.regions.bed.gz" \\ - --sample_name $sample \\ - --out_file "${prefix}.genome.coverage.pdf" + mosdepth --by $bed --fast-mode --thresholds 1,10,50,100,500 ${prefix} ${bam[0]} """ } +// plot_mosdepth_regions.r \\ +// --region_file "${prefix}.regions.bed.gz" \\ +// --sample_name $sample \\ +// --out_file "${prefix}.coverage.pdf" //////////////////////////////////////////////////// /* -- VARSCAN2 -- */ From 74e7e7ef3efd7a070098eb5e71ba7a5c8cee1099 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 22:09:15 +0100 Subject: [PATCH 035/129] Add docs for amplicon_right_suffix --- docs/usage.md | 66 +++++++++++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 2c913ee5..f8d4b2e6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -21,12 +21,6 @@ * [`--fasta`](#--fasta) * [`--gff`](#--gff) * [`--save_reference`](#--save_reference) -* [Kraken 2](#kraken-2) - * [`--kraken2_db`](#--kraken2_db) - * [`--kraken2_db_name`](#--kraken2_db_name) - * [`--kraken2_use_ftp`](#--kraken2_use_ftp) - * [`--save_kraken2_fastq`](#--save_kraken2_fastq) - * [`--skip_kraken2`](#--skip_kraken2) * [Read trimming](#read-trimming) * [`--cut_mean_quality`](#--cut_mean_quality) * [`--qualified_quality_phred`](#--qualified_quality_phred) @@ -35,6 +29,12 @@ * [`--skip_adapter_trimming`](#--skip_adapter_trimming) * [`--skip_amplicon_trimming`](#--skip_amplicon_trimming) * [`--save_trimmed`](#--save_trimmed) +* [Kraken 2](#kraken-2) + * [`--kraken2_db`](#--kraken2_db) + * [`--kraken2_db_name`](#--kraken2_db_name) + * [`--kraken2_use_ftp`](#--kraken2_use_ftp) + * [`--save_kraken2_fastq`](#--save_kraken2_fastq) + * [`--skip_kraken2`](#--skip_kraken2) * [Variant calling](#variant-calling) * [`--callers`](#-callers) * [`--ivar_exclude_reads`](#--ivar_exclude_reads) @@ -42,6 +42,8 @@ * [`--filter_unmapped`](#--filter_unmapped) * [`--min_base_qual`](#--min_base_qual) * [`--max_allele_freq`](#--max_allele_freq) + * [`--amplicon_left_suffix`](#--amplicon_left_suffix) + * [`--amplicon_right_suffix`](#--amplicon_right_suffix) * [`--min_coverage`](#--min_coverage) * [`--save_align_intermeds`](#--save_align_intermeds) * [`--save_mpileup`](#--save_mpileup) @@ -300,28 +302,6 @@ Full path to viral [GFF](http://www.gmod.org/wiki/GFF3) annotation file (Default If the Bowtie2 index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times (Default: false). -## Kraken 2 - -### `--kraken2_db` - -Full path to Kraken 2 database built from host genome (Default: ''). - -### `--kraken2_db_name` - -Name for host genome as recognised by Kraken 2 when using the `kraken2 build` command (Default: 'human'). - -### `--kraken2_use_ftp` - -Option for Kraken 2 using ftp download instead of rsync (Default: false). - -### `--save_kraken2_fastq` - -Save the host and viral FastQ files in the results directory (Default: false). - -### `--skip_kraken2` - -Skip Kraken 2 process for removing host classified reads (Default: false). - ## Read trimming ### `--cut_mean_quality` @@ -352,6 +332,28 @@ Skip the amplicon trimming step performed by Cutadapt. Use this if your input Fa By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete (Default: false). +## Kraken 2 + +### `--kraken2_db` + +Full path to Kraken 2 database built from host genome (Default: ''). + +### `--kraken2_db_name` + +Name for host genome as recognised by Kraken 2 when using the `kraken2 build` command (Default: 'human'). + +### `--kraken2_use_ftp` + +Option for Kraken 2 using ftp download instead of rsync (Default: false). + +### `--save_kraken2_fastq` + +Save the host and viral FastQ files in the results directory (Default: false). + +### `--skip_kraken2` + +Skip Kraken 2 process for removing host classified reads (Default: false). + ## Variant calling ### `--callers` @@ -382,6 +384,14 @@ When performing variant calling skip positions with an overall read depth smalle Maximum allele frequency threshold for filtering variant calls (Default: 0.8). +### `--amplicon_left_suffix` + +Suffix used in name field of `--amplicon_bed` to indicate left primer position (Default: '_LEFT'). + +### `--amplicon_right_suffix` + +Suffix used in name field of `--amplicon_bed` to indicate right primer position (Default: '_RIGHT'). + ### `--save_align_intermeds` By default, intermediate [BAM](https://samtools.github.io/hts-specs/) files will not be saved. The final BAM files created after the appropriate filtering step are always saved to limit storage usage. Set to true to also save other intermediate BAM files (Default: false). From 557a29c0bc89c0432652ef21aa9a0e09c87b3e3e Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 22:09:18 +0100 Subject: [PATCH 036/129] Add docs for amplicon_right_suffix --- main.nf | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/main.nf b/main.nf index 91b8a297..4d094bab 100644 --- a/main.nf +++ b/main.nf @@ -22,8 +22,8 @@ def helpMessage() { Mandatory arguments --input [file] Comma-separated file containing information about the samples in the experiment (see docs/usage.md) --fasta [file] Path to fasta reference for viral genome. Mandatory when --genome not supplied - --amplicon_bed [file] Path to BED file containing amplicon positions. Mandatory when --protocol 'amplicon' - --amplicon_fasta [file] Path to fasta file containing amplicon sequences. Mandatory when --protocol 'amplicon' + --amplicon_bed [file] Path to BED file containing amplicon positions. Mandatory when calling variants with --protocol amplicon + --amplicon_fasta [file] Path to fasta file containing amplicon sequences. Mandatory when performing assembly with --protocol amplicon -profile [str] Configuration profile to use. Can use multiple (comma separated) Available: conda, docker, singularity, test, awsbatch, and more @@ -39,13 +39,6 @@ def helpMessage() { --gff [file] Full path to viral gff annotation file (Default: '') --save_reference [bool] If generated by the pipeline save the Bowtie2 indices in the results directory (Default: false) - Kraken2 - --kraken2_db [file] Full path to Kraken2 database built from host genome (Default: kraken2_human.tar.gz hosted on Zenodo) - --kraken2_db_name [str] Name of host genome for building Kraken2 database (Default: 'human') - --kraken2_use_ftp [bool] Use FTP instead of rsync when building kraken2 databases (Default: false) - --save_kraken2_fastq [bool] Save the host and viral fastq files in the results directory (Default: false) - --skip_kraken2 [bool] Skip Kraken2 process for removing host classified reads (Default: false) - Read trimming --cut_mean_quality [int] The mean quality requirement option shared by fastp cut_front, cut_tail or cut_sliding options. Range: 1~36 (Default: 30 (Q30)) --qualified_quality_phred [int] The quality value that a base is qualified. Default 30 means phred quality >=Q30 is qualified (Default: 30) @@ -55,6 +48,13 @@ def helpMessage() { --skip_amplicon_trimming [bool] Skip the amplicon trimming step with Cutadapt (Default: false) --save_trimmed [bool] Save the trimmed FastQ files in the results directory (Default: false) + Kraken2 + --kraken2_db [file] Full path to Kraken2 database built from host genome (Default: kraken2_human.tar.gz hosted on Zenodo) + --kraken2_db_name [str] Name of host genome for building Kraken2 database (Default: 'human') + --kraken2_use_ftp [bool] Use FTP instead of rsync when building kraken2 databases (Default: false) + --save_kraken2_fastq [bool] Save the host and viral fastq files in the results directory (Default: false) + --skip_kraken2 [bool] Skip Kraken2 process for removing host classified reads (Default: false) + Variant calling --callers [str] Specify which variant calling algorithms you would like to use (Default: 'varscan2,ivar,bcftools') --ivar_exclude_reads [bool] Unset -e parameter for iVar trim. Reads with primers are included by default (Default: false) @@ -63,6 +63,8 @@ def helpMessage() { --min_base_qual [int] When performing variant calling skip bases with baseQ/BAQ smaller than this number (Default: 20) --min_coverage [int] When performing variant calling skip positions with an overall read depth smaller than this number (Default: 10) --max_allele_freq [float] Maximum allele frequency threshold for filtering variant calls (Default: 0.8) + --amplicon_left_suffix [str] Suffix used in name field of --amplicon_bed to indicate left primer position (Default: '_LEFT') + --amplicon_right_suffix [str] Suffix used in name field of --amplicon_bed to indicate right primer position (Default: '_RIGHT') --save_align_intermeds [bool] Save the intermediate BAM files from the alignment steps (Default: false) --save_mpileup [bool] Save MPileup files generated during variant calling (Default: false) --skip_markduplicates [bool] Skip picard MarkDuplicates step (Default: false) @@ -223,6 +225,8 @@ summary['Samplesheet'] = params.input summary['Protocol'] = params.protocol if (params.protocol == 'amplicon') summary['Amplicon Fasta File'] = params.amplicon_fasta if (params.protocol == 'amplicon') summary['Amplicon BED File'] = params.amplicon_bed +if (params.protocol == 'amplicon') summary['Amplicon Left Suffix'] = params.amplicon_left_suffix +if (params.protocol == 'amplicon') summary['Amplicon Right Suffix'] = params.amplicon_right_suffix summary['Viral Genome'] = params.genome ?: 'Not supplied' summary['Viral Fasta File'] = params.fasta if (params.gff) summary['Viral GFF'] = params.gff @@ -1135,7 +1139,8 @@ process MOSDEPTH_AMPLICON { suffix = params.skip_markduplicates ? "" : ".mkD" prefix = "${sample}.trim${suffix}.amplicon" """ - mosdepth --by $bed --fast-mode --thresholds 1,10,50,100,500 ${prefix} ${bam[0]} + collapse_amplicon_bed.py $bed amplicon.collapsed.bed $params.amplicon_left_suffix $params.amplicon_right_suffix + mosdepth --by amplicon.collapsed.bed --fast-mode --thresholds 1,10,50,100,500 ${prefix} ${bam[0]} """ } // plot_mosdepth_regions.r \\ From 7664913986348385747ad9bc797ca0277135011c Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 22:09:36 +0100 Subject: [PATCH 037/129] Inialise amplicon_right_suffix --- nextflow.config | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/nextflow.config b/nextflow.config index f72bb5a1..a55ab39c 100644 --- a/nextflow.config +++ b/nextflow.config @@ -22,13 +22,6 @@ params { genome = false save_reference = false - // Options: Kraken2 - kraken2_db = 'https://zenodo.org/record/3738199/files/kraken2_human.tar.gz' - kraken2_db_name = 'human' - kraken2_use_ftp = false - save_kraken2_fastq = false - skip_kraken2 = false - // Options: Read Trimming cut_mean_quality = 30 qualified_quality_phred = 30 @@ -38,6 +31,13 @@ params { skip_amplicon_trimming = false save_trimmed = false + // Options: Kraken2 + kraken2_db = 'https://zenodo.org/record/3738199/files/kraken2_human.tar.gz' + kraken2_db_name = 'human' + kraken2_use_ftp = false + save_kraken2_fastq = false + skip_kraken2 = false + // Options: Variant calling callers = 'varscan2,ivar,bcftools' ivar_exclude_reads = false @@ -46,6 +46,8 @@ params { min_base_qual = 20 min_coverage = 10 max_allele_freq = 0.8 + amplicon_left_suffix = '_LEFT' + amplicon_right_suffix = '_RIGHT' save_align_intermeds = false save_mpileup = false skip_markduplicates = false @@ -68,7 +70,7 @@ params { // Options: QC skip_fastqc = false skip_multiqc = false - + // Boilerplate options outdir = './results' publish_dir_mode = 'copy' From 4e31914d2a6515c0a5300c0a1a67386d5633632b Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 22:15:19 +0100 Subject: [PATCH 038/129] Tweak parameters --- bin/collapse_amplicon_bed.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/collapse_amplicon_bed.py b/bin/collapse_amplicon_bed.py index d2ca7636..0fb2a263 100755 --- a/bin/collapse_amplicon_bed.py +++ b/bin/collapse_amplicon_bed.py @@ -13,8 +13,8 @@ def parse_args(args=None): parser = argparse.ArgumentParser(description=Description, epilog=Epilog) parser.add_argument('FILE_IN', help="Input BED file.") parser.add_argument('FILE_OUT', help="Output BED file.") - parser.add_argument('-ls', '--left_primer_suffix', type=str, dest="LEFT_PRIMER_SUFFIX", default='_LEFT', help="Suffix for left primer in name column of BED file (default: '_LEFT').") - parser.add_argument('-rs', '--right_primer_suffix', type=str, dest="RIGHT_PRIMER_SUFFIX", default='_RIGHT', help="Suffix for right primer in name column of BED file (default: '_RIGHT').") + parser.add_argument('-lp', '--left_primer_suffix', type=str, dest="LEFT_PRIMER_SUFFIX", default='_LEFT', help="Suffix for left primer in name column of BED file (default: '_LEFT').") + parser.add_argument('-rp', '--right_primer_suffix', type=str, dest="RIGHT_PRIMER_SUFFIX", default='_RIGHT', help="Suffix for right primer in name column of BED file (default: '_RIGHT').") return parser.parse_args(args) From e43a2fd2b70b6bf7b61fca04513ae2603da502bc Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 22:15:30 +0100 Subject: [PATCH 039/129] Fix mosdepth scripts --- main.nf | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 4d094bab..a6e18707 100644 --- a/main.nf +++ b/main.nf @@ -1113,7 +1113,6 @@ process MOSDEPTH_GENOME { prefix = params.protocol == 'amplicon' ? "${sample}.trim${suffix}.genome" : "${sample}${suffix}.genome" """ mosdepth --by 200 --fast-mode $prefix ${bam[0]} - plot_mosdepth_regions.r --region_file "${prefix}.regions.bed.gz" --sample_name $sample """ } @@ -1139,10 +1138,15 @@ process MOSDEPTH_AMPLICON { suffix = params.skip_markduplicates ? "" : ".mkD" prefix = "${sample}.trim${suffix}.amplicon" """ - collapse_amplicon_bed.py $bed amplicon.collapsed.bed $params.amplicon_left_suffix $params.amplicon_right_suffix + collapse_amplicon_bed.py \\ + --left_primer_suffix $params.amplicon_left_suffix \\ + --right_primer_suffix $params.amplicon_right_suffix \\ + $bed \\ + amplicon.collapsed.bed mosdepth --by amplicon.collapsed.bed --fast-mode --thresholds 1,10,50,100,500 ${prefix} ${bam[0]} """ } +//plot_mosdepth_regions.r --region_file "${prefix}.regions.bed.gz" --sample_name $sample // plot_mosdepth_regions.r \\ // --region_file "${prefix}.regions.bed.gz" \\ // --sample_name $sample \\ From cf3b4d1c86ec87f9f358ce71f0d2435514eaf1f8 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 16 Jun 2020 22:37:17 +0100 Subject: [PATCH 040/129] Update regex --- bin/collapse_amplicon_bed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/collapse_amplicon_bed.py b/bin/collapse_amplicon_bed.py index 0fb2a263..9c2346e8 100755 --- a/bin/collapse_amplicon_bed.py +++ b/bin/collapse_amplicon_bed.py @@ -42,7 +42,7 @@ def collapse_amplicon_bed(FileIn,FileOut,LeftPrimerSuffix,RightPrimerSuffix): line = fin.readline() if line: chrom,start,end,name,score,strand = line.strip().split('\t') - amplicon = re.sub(r'(?:{}|{})'.format(LeftPrimerSuffix,RightPrimerSuffix),'',name) + amplicon = re.sub(r'(?:{}|{}).*'.format(LeftPrimerSuffix,RightPrimerSuffix),'',name) if amplicon not in IntervalDict: IntervalDict[amplicon] = [] IntervalDict[amplicon].append((chrom,int(start),int(end),score)) From 9fb7daa4d4ad16166908713e8edfef1529b001d8 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 00:19:17 +0100 Subject: [PATCH 041/129] Add into pipeline --- bin/plot_mosdepth_dist.r | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/bin/plot_mosdepth_dist.r b/bin/plot_mosdepth_dist.r index a693cea7..c6f17d61 100755 --- a/bin/plot_mosdepth_dist.r +++ b/bin/plot_mosdepth_dist.r @@ -44,6 +44,8 @@ if (!file.exists(OUTDIR)) { dir.create(OUTDIR,recursive=TRUE) } +OUTSUFFIX <- trimws(opt$output_suffix, "both", whitespace = "\\.") + ################################################ ################################################ ## READ IN DATA ## @@ -79,7 +81,7 @@ for (sample in unique(dat$sample)) { xlab('Coverage') + ggtitle(paste(sample,' genome coverage')) - outfile <- paste(OUTDIR,sample,".",opt$output_suffix,".coverage.pdf", sep='') + outfile <- paste(OUTDIR,sample,".",OUTSUFFIX,".coverage.pdf", sep='') ggsave(file=outfile, plot, height=4, width=8, units="in") } @@ -89,20 +91,22 @@ for (sample in unique(dat$sample)) { ################################################ ################################################ -plot <- ggplot(dat,aes(x=coverage,y=frequency,colour=sample)) + - geom_line(stat="identity") + - theme_bw() + - scale_x_continuous(expand=c(0, 0)) + - scale_y_continuous(limits=c(0,1), - breaks=seq(0,1,0.2), - labels=seq(0,1,0.2), - expand=c(0, 0)) + - ylab('Proportion of genome at coverage') + - xlab('Coverage') + - ggtitle(paste('All samples genome coverage')) +if (length(INPUT_FILES) > 1) { + plot <- ggplot(dat,aes(x=coverage,y=frequency,colour=sample)) + + geom_line(stat="identity") + + theme_bw() + + scale_x_continuous(expand=c(0, 0)) + + scale_y_continuous(limits=c(0,1), + breaks=seq(0,1,0.2), + labels=seq(0,1,0.2), + expand=c(0, 0)) + + ylab('Proportion of genome at coverage') + + xlab('Coverage') + + ggtitle(paste('All samples genome coverage')) -outfile <- paste(OUTDIR,"all_samples.",opt$output_suffix,".coverage.pdf", sep='') -ggsave(file=outfile, plot, height=6, width=12, units="in") + outfile <- paste(OUTDIR,"all_samples.",OUTSUFFIX,".coverage.pdf", sep='') + ggsave(file=outfile, plot, height=6, width=12, units="in") +} ################################################ ################################################ From 977c0edc24292054db8a775bfa01501c07933dfa Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 00:19:33 +0100 Subject: [PATCH 042/129] Bug fixes and polishing --- bin/plot_mosdepth_regions.r | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/bin/plot_mosdepth_regions.r b/bin/plot_mosdepth_regions.r index ca075d39..4cbec5c3 100755 --- a/bin/plot_mosdepth_regions.r +++ b/bin/plot_mosdepth_regions.r @@ -47,6 +47,8 @@ if (!file.exists(OUTDIR)) { dir.create(OUTDIR,recursive=TRUE) } +OUTSUFFIX <- trimws(opt$output_suffix, "both", whitespace = "\\.") + ################################################ ################################################ ## READ IN DATA ## @@ -63,14 +65,14 @@ for (input_file in INPUT_FILES) { ## Reformat table if (ncol(dat) == 6) { colnames(dat) <- c('chrom', 'start','end', 'region', 'coverage', 'sample') - dat$region <- factor(dat$region, levels=unique(dat$region[order(dat$start, decreasing=TRUE)])) + dat$region <- factor(dat$region, levels=unique(dat$region[order(dat$start)])) } else { colnames(dat) <- c('chrom', 'start','end', 'coverage', 'sample') } dat$sample <- factor(dat$sample, levels=sort(unique(dat$sample))) ## Write merged coverage data for all samples to file -outfile <- paste(OUTDIR,"all_samples.",opt$output_suffix,".coverage.tsv", sep='') +outfile <- paste(OUTDIR,"all_samples.",OUTSUFFIX,".coverage.tsv", sep='') write.table(dat, file=outfile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) ################################################ @@ -81,7 +83,7 @@ write.table(dat, file=outfile, col.names=TRUE, row.names=FALSE, sep='\t', quote= for (sample in unique(dat$sample)) { sample_dat <- dat[dat$sample == sample,] - outfile <- paste(OUTDIR,sample,".",opt$output_suffix,".coverage.tsv", sep='') + outfile <- paste(OUTDIR,sample,".",OUTSUFFIX,".coverage.tsv", sep='') write.table(sample_dat,file=outfile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) sample_dat$coverage <- sample_dat$coverage + 1 @@ -102,7 +104,7 @@ for (sample in unique(dat$sample)) { xlab('Amplicon') + ggtitle(paste(sample,'per amplicon coverage')) - outfile <- paste(OUTDIR,sample,".",opt$output_suffix,".coverage.pdf", sep='') + outfile <- paste(OUTDIR,sample,".",OUTSUFFIX,".coverage.pdf", sep='') ggsave(file=outfile, plot, width=16, height=3+(0.3*length(unique(sample_dat$region))), units="cm") } else { plot <- ggplot(sample_dat,aes(x=end,y=coverage)) + @@ -115,10 +117,10 @@ for (sample in unique(dat$sample)) { expand=c(0, 0)) + ylab(bquote('log'[10]~'(Coverage+1)')) + xlab('Position (bp)') + - ggtitle(paste(SAMPLE_NAME,'coverage')) + ggtitle(paste(sample,'coverage')) - outfile <- paste(OUTDIR,sample,".",opt$output_suffix,".coverage.pdf", sep='') - ggsave(file=outfile, plot, width=12, height=6, units="cm") + outfile <- paste(OUTDIR,sample,".",OUTSUFFIX,".coverage.pdf", sep='') + ggsave(file=outfile, plot, width=12, height=6, units="in") } } @@ -150,7 +152,7 @@ if (ncol(dat) == 6 && length(INPUT_FILES) > 1) { ## Size of heatmaps scaled based on matrix dimensions: https://jokergoo.github.io/ComplexHeatmap-reference/book/other-tricks.html#set-the-same-cell-size-for-different-heatmaps-with-different-dimensions width = 0.1969*ncol(mat) + (2*1.3150) height = 0.1969*nrow(mat) + 1.3150 - outfile <- paste(OUTDIR,"all_samples.",opt$output_suffix,".heatmap.pdf", sep='') + outfile <- paste(OUTDIR,"all_samples.",OUTSUFFIX,".heatmap.pdf", sep='') pdf(file=outfile, width=width, height=height) draw(heatmap) dev.off() From 1b0062269b1b80d0baca12d183e1136ed2a12fb1 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 00:19:46 +0100 Subject: [PATCH 043/129] Add plots...yaayyyyy --- main.nf | 73 ++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 11 deletions(-) diff --git a/main.nf b/main.nf index a6e18707..39a3ad9c 100644 --- a/main.nf +++ b/main.nf @@ -1092,12 +1092,17 @@ process PICARD_METRICS { } /* - * STEP 5.6.1: mosdepth genome-wide coverage plots + * STEP 5.6.1: mosdepth genome-wide coverage */ process MOSDEPTH_GENOME { tag "$sample" label 'process_medium' - publishDir "${params.outdir}/variants/bam/mosdepth/genome", mode: params.publish_dir_mode + publishDir "${params.outdir}/variants/bam/mosdepth/genome", mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.endsWith(".pdf")) "plots/$filename" + else if (filename.endsWith(".tsv")) "plots/$filename" + else filename + } when: !params.skip_variants && !params.skip_mosdepth @@ -1106,18 +1111,31 @@ process MOSDEPTH_GENOME { tuple val(sample), val(single_end), path(bam) from ch_markdup_bam_mosdepth_genome output: - path "*.{pdf,txt,gz,csi}" + path "*.{txt,gz,csi,tsv,pdf}" script: suffix = params.skip_markduplicates ? "" : ".mkD" prefix = params.protocol == 'amplicon' ? "${sample}.trim${suffix}.genome" : "${sample}${suffix}.genome" + plot_suffix = params.protocol == 'amplicon' ? ".trim${suffix}.genome" : "${suffix}.genome" """ mosdepth --by 200 --fast-mode $prefix ${bam[0]} + + plot_mosdepth_regions.r \\ + --input_files ${prefix}.regions.bed.gz \\ + --input_suffix ${plot_suffix}.regions.bed.gz \\ + --output_dir ./ \\ + --output_suffix ${plot_suffix}.regions + + plot_mosdepth_dist.r \\ + --input_files ${prefix}.mosdepth.global.dist.txt \\ + --input_suffix ${plot_suffix}.mosdepth.global.dist.txt \\ + --output_dir ./ \\ + --output_suffix ${plot_suffix}.mosdepth.global.dist """ } /* - * STEP 5.6.2: mosdepth amplicon coverage plots + * STEP 5.6.2: mosdepth amplicon coverage */ process MOSDEPTH_AMPLICON { tag "$sample" @@ -1132,7 +1150,9 @@ process MOSDEPTH_AMPLICON { path bed from ch_amplicon_bed output: - path "*.{pdf,txt,gz,csi}" + path "*.global.dist.txt" into ch_mosdepth_amplicon_region_dist + path "*.regions.bed.gz" into ch_mosdepth_amplicon_region_bed + path "*.{summary.txt,region.dist.txt,per-base.bed.gz,thresholds.bed.gz,csi}" script: suffix = params.skip_markduplicates ? "" : ".mkD" @@ -1143,14 +1163,45 @@ process MOSDEPTH_AMPLICON { --right_primer_suffix $params.amplicon_right_suffix \\ $bed \\ amplicon.collapsed.bed - mosdepth --by amplicon.collapsed.bed --fast-mode --thresholds 1,10,50,100,500 ${prefix} ${bam[0]} + mosdepth --by amplicon.collapsed.bed --fast-mode --thresholds 0,1,10,50,100,500 ${prefix} ${bam[0]} + """ +} + +/* + * STEP 5.6.3: mosdepth amplicon coverage plots + */ +process MOSDEPTH_AMPLICON_PLOT { + tag "$sample" + label 'process_medium' + publishDir "${params.outdir}/variants/bam/mosdepth/amplicon/plots", mode: params.publish_dir_mode + + when: + !params.skip_variants && !params.skip_mosdepth && params.protocol == 'amplicon' + + input: + path dist from ch_mosdepth_amplicon_region_dist.collect() + path bed from ch_mosdepth_amplicon_region_bed.collect() + + output: + path "*.{tsv,pdf}" + + script: + suffix = params.skip_markduplicates ? "" : ".mkD" + suffix = ".trim${suffix}.amplicon" + """ + plot_mosdepth_regions.r \\ + --input_files ${bed.join(',')} \\ + --input_suffix ${suffix}.regions.bed.gz \\ + --output_dir ./ \\ + --output_suffix ${suffix}.regions + + plot_mosdepth_dist.r \\ + --input_files ${dist.join(',')} \\ + --input_suffix ${suffix}.mosdepth.global.dist.txt \\ + --output_dir ./ \\ + --output_suffix ${suffix}.mosdepth.global.dist """ } -//plot_mosdepth_regions.r --region_file "${prefix}.regions.bed.gz" --sample_name $sample -// plot_mosdepth_regions.r \\ -// --region_file "${prefix}.regions.bed.gz" \\ -// --sample_name $sample \\ -// --out_file "${prefix}.coverage.pdf" //////////////////////////////////////////////////// /* -- VARSCAN2 -- */ From 39ca540a2593c010e6b86bd8bedc2adc0420caff Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 00:28:54 +0100 Subject: [PATCH 044/129] Unify plot sizes --- bin/plot_mosdepth_dist.r | 2 +- bin/plot_mosdepth_regions.r | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/plot_mosdepth_dist.r b/bin/plot_mosdepth_dist.r index c6f17d61..90b09a45 100755 --- a/bin/plot_mosdepth_dist.r +++ b/bin/plot_mosdepth_dist.r @@ -82,7 +82,7 @@ for (sample in unique(dat$sample)) { ggtitle(paste(sample,' genome coverage')) outfile <- paste(OUTDIR,sample,".",OUTSUFFIX,".coverage.pdf", sep='') - ggsave(file=outfile, plot, height=4, width=8, units="in") + ggsave(file=outfile, plot, height=6, width=12, units="in") } ################################################ diff --git a/bin/plot_mosdepth_regions.r b/bin/plot_mosdepth_regions.r index 4cbec5c3..fdd41b31 100755 --- a/bin/plot_mosdepth_regions.r +++ b/bin/plot_mosdepth_regions.r @@ -105,7 +105,7 @@ for (sample in unique(dat$sample)) { ggtitle(paste(sample,'per amplicon coverage')) outfile <- paste(OUTDIR,sample,".",OUTSUFFIX,".coverage.pdf", sep='') - ggsave(file=outfile, plot, width=16, height=3+(0.3*length(unique(sample_dat$region))), units="cm") + ggsave(file=outfile, plot, height=3+(0.3*length(unique(sample_dat$region))), width=16, units="cm") } else { plot <- ggplot(sample_dat,aes(x=end,y=coverage)) + geom_ribbon(aes(ymin=0, ymax=coverage), fill="#D55E00", data=) + @@ -120,7 +120,7 @@ for (sample in unique(dat$sample)) { ggtitle(paste(sample,'coverage')) outfile <- paste(OUTDIR,sample,".",OUTSUFFIX,".coverage.pdf", sep='') - ggsave(file=outfile, plot, width=12, height=6, units="in") + ggsave(file=outfile, plot, height=6, width=12, units="in") } } @@ -150,10 +150,10 @@ if (ncol(dat) == 6 && length(INPUT_FILES) > 1) { col = viridis(50)) ## Size of heatmaps scaled based on matrix dimensions: https://jokergoo.github.io/ComplexHeatmap-reference/book/other-tricks.html#set-the-same-cell-size-for-different-heatmaps-with-different-dimensions - width = 0.1969*ncol(mat) + (2*1.3150) height = 0.1969*nrow(mat) + 1.3150 + width = 0.1969*ncol(mat) + (2*1.3150) outfile <- paste(OUTDIR,"all_samples.",OUTSUFFIX,".heatmap.pdf", sep='') - pdf(file=outfile, width=width, height=height) + pdf(file=outfile, height=height, width=width) draw(heatmap) dev.off() } From e48c26d8987e654d2b7818f6a0a743bf49d4abdb Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 00:35:37 +0100 Subject: [PATCH 045/129] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42b7667e..e4482495 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub * Parameters: * `--skip_mosdepth` to skip genome-wide and amplicon coverage plot generation from mosdepth output + * `--amplicon_left_suffix` to provide left primer suffix used in name field of `--amplicon_bed` + * `--amplicon_right_suffix` to provide right primer suffix used in name field of `--amplicon_bed` ### `Removed` From 240fde2d236e7fcfb0e472cd92b75e720e4dc44e Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 00:35:46 +0100 Subject: [PATCH 046/129] Fix markdown --- docs/usage.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index f8d4b2e6..56c8b10c 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -386,11 +386,11 @@ Maximum allele frequency threshold for filtering variant calls (Default: 0.8). ### `--amplicon_left_suffix` -Suffix used in name field of `--amplicon_bed` to indicate left primer position (Default: '_LEFT'). +Suffix used in name field of `--amplicon_bed` to indicate left primer position (Default: '\_LEFT'). ### `--amplicon_right_suffix` -Suffix used in name field of `--amplicon_bed` to indicate right primer position (Default: '_RIGHT'). +Suffix used in name field of `--amplicon_bed` to indicate right primer position (Default: '\_RIGHT'). ### `--save_align_intermeds` From 97737fb2fde59627419c1e44865e559cac2a8b99 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 08:22:26 +0100 Subject: [PATCH 047/129] Fix channels --- main.nf | 123 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 66 insertions(+), 57 deletions(-) diff --git a/main.nf b/main.nf index 39a3ad9c..7af7458c 100644 --- a/main.nf +++ b/main.nf @@ -1118,7 +1118,11 @@ process MOSDEPTH_GENOME { prefix = params.protocol == 'amplicon' ? "${sample}.trim${suffix}.genome" : "${sample}${suffix}.genome" plot_suffix = params.protocol == 'amplicon' ? ".trim${suffix}.genome" : "${suffix}.genome" """ - mosdepth --by 200 --fast-mode $prefix ${bam[0]} + mosdepth \\ + --by 200 \\ + --fast-mode \\ + $prefix \\ + ${bam[0]} plot_mosdepth_regions.r \\ --input_files ${prefix}.regions.bed.gz \\ @@ -1135,72 +1139,77 @@ process MOSDEPTH_GENOME { } /* - * STEP 5.6.2: mosdepth amplicon coverage + * STEP 5.6.2: mosdepth amplicon coverage and plots */ -process MOSDEPTH_AMPLICON { - tag "$sample" - label 'process_medium' - publishDir "${params.outdir}/variants/bam/mosdepth/amplicon", mode: params.publish_dir_mode - - when: - !params.skip_variants && !params.skip_mosdepth && params.protocol == 'amplicon' +if (params.protocol == 'amplicon') { + process MOSDEPTH_AMPLICON { + tag "$sample" + label 'process_medium' + publishDir "${params.outdir}/variants/bam/mosdepth/amplicon", mode: params.publish_dir_mode - input: - tuple val(sample), val(single_end), path(bam) from ch_markdup_bam_mosdepth_amplicon - path bed from ch_amplicon_bed + when: + !params.skip_variants && !params.skip_mosdepth - output: - path "*.global.dist.txt" into ch_mosdepth_amplicon_region_dist - path "*.regions.bed.gz" into ch_mosdepth_amplicon_region_bed - path "*.{summary.txt,region.dist.txt,per-base.bed.gz,thresholds.bed.gz,csi}" + input: + tuple val(sample), val(single_end), path(bam) from ch_markdup_bam_mosdepth_amplicon + path bed from ch_amplicon_bed - script: - suffix = params.skip_markduplicates ? "" : ".mkD" - prefix = "${sample}.trim${suffix}.amplicon" - """ - collapse_amplicon_bed.py \\ - --left_primer_suffix $params.amplicon_left_suffix \\ - --right_primer_suffix $params.amplicon_right_suffix \\ - $bed \\ - amplicon.collapsed.bed - mosdepth --by amplicon.collapsed.bed --fast-mode --thresholds 0,1,10,50,100,500 ${prefix} ${bam[0]} - """ -} + output: + path "*.global.dist.txt" into ch_mosdepth_amplicon_region_dist + path "*.regions.bed.gz" into ch_mosdepth_amplicon_region_bed + path "*.{summary.txt,region.dist.txt,per-base.bed.gz,thresholds.bed.gz,csi}" -/* - * STEP 5.6.3: mosdepth amplicon coverage plots - */ -process MOSDEPTH_AMPLICON_PLOT { - tag "$sample" - label 'process_medium' - publishDir "${params.outdir}/variants/bam/mosdepth/amplicon/plots", mode: params.publish_dir_mode + script: + suffix = params.skip_markduplicates ? "" : ".mkD" + prefix = "${sample}.trim${suffix}.amplicon" + """ + collapse_amplicon_bed.py \\ + --left_primer_suffix $params.amplicon_left_suffix \\ + --right_primer_suffix $params.amplicon_right_suffix \\ + $bed \\ + amplicon.collapsed.bed + + mosdepth \\ + --by amplicon.collapsed.bed \\ + --fast-mode \\ + --thresholds 0,1,10,50,100,500 \\ + ${prefix} \\ + ${bam[0]} + """ + } - when: - !params.skip_variants && !params.skip_mosdepth && params.protocol == 'amplicon' + process MOSDEPTH_AMPLICON_PLOT { + tag "$sample" + label 'process_medium' + publishDir "${params.outdir}/variants/bam/mosdepth/amplicon/plots", mode: params.publish_dir_mode - input: - path dist from ch_mosdepth_amplicon_region_dist.collect() - path bed from ch_mosdepth_amplicon_region_bed.collect() + when: + !params.skip_variants && !params.skip_mosdepth - output: - path "*.{tsv,pdf}" + input: + path dist from ch_mosdepth_amplicon_region_dist.collect() + path bed from ch_mosdepth_amplicon_region_bed.collect() - script: - suffix = params.skip_markduplicates ? "" : ".mkD" - suffix = ".trim${suffix}.amplicon" - """ - plot_mosdepth_regions.r \\ - --input_files ${bed.join(',')} \\ - --input_suffix ${suffix}.regions.bed.gz \\ - --output_dir ./ \\ - --output_suffix ${suffix}.regions + output: + path "*.{tsv,pdf}" - plot_mosdepth_dist.r \\ - --input_files ${dist.join(',')} \\ - --input_suffix ${suffix}.mosdepth.global.dist.txt \\ - --output_dir ./ \\ - --output_suffix ${suffix}.mosdepth.global.dist - """ + script: + suffix = params.skip_markduplicates ? "" : ".mkD" + suffix = ".trim${suffix}.amplicon" + """ + plot_mosdepth_regions.r \\ + --input_files ${bed.join(',')} \\ + --input_suffix ${suffix}.regions.bed.gz \\ + --output_dir ./ \\ + --output_suffix ${suffix}.regions + + plot_mosdepth_dist.r \\ + --input_files ${dist.join(',')} \\ + --input_suffix ${suffix}.mosdepth.global.dist.txt \\ + --output_dir ./ \\ + --output_suffix ${suffix}.mosdepth.global.dist + """ + } } //////////////////////////////////////////////////// From 175d962f9e3940a86631d54118845401daaafc2d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 10:26:00 +0100 Subject: [PATCH 048/129] Update plasmidid version --- CHANGELOG.md | 2 +- environment.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e4482495..dc73f843 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,7 +33,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Update bowtie2 `2.3.5.1` -> `2.4.1` * Update picard `2.22.8` -> `2.23.0` * Update minia `3.2.3` -> `3.2.4` -* Update plasmidid `1.5.2` -> `1.6.2` +* Update plasmidid `1.5.2` -> `1.6.3` ## [1.0.0] - 2020-06-01 diff --git a/environment.yml b/environment.yml index 452d315f..29cc35ca 100644 --- a/environment.yml +++ b/environment.yml @@ -54,6 +54,6 @@ dependencies: - bioconda::vg=1.24.0 - bioconda::quast=5.0.2 - bioconda::blast=2.9.0 - - bioconda::plasmidid=1.6.2 + - bioconda::plasmidid=1.6.3 - bioconda::bandage=0.8.1 - hcc::abacas=1.3.1 From 1414c630a859ec1088d28c016f46ed97938cbccc Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 12:28:54 +0100 Subject: [PATCH 049/129] Remove plotting script --- bin/plot_mosdepth_dist.r | 114 --------------------------------------- 1 file changed, 114 deletions(-) delete mode 100755 bin/plot_mosdepth_dist.r diff --git a/bin/plot_mosdepth_dist.r b/bin/plot_mosdepth_dist.r deleted file mode 100755 index 90b09a45..00000000 --- a/bin/plot_mosdepth_dist.r +++ /dev/null @@ -1,114 +0,0 @@ -#!/usr/bin/env Rscript - -################################################ -################################################ -## LOAD LIBRARIES ## -################################################ -################################################ - -library(optparse) -library(ggplot2) -library(scales) - -################################################ -################################################ -## VALIDATE COMMAND-LINE PARAMETERS ## -################################################ -################################################ - -option_list <- list(make_option(c("-i", "--input_files"), type="character", default=NULL, help="Comma-separated list of mosdepth regions output file (typically end in *.mosdepth.global.dist.txt)", metavar="input_files"), - make_option(c("-s", "--input_suffix"), type="character", default='.mosdepth.global.dist.txt', help="Portion of filename after sample name to trim for plot title e.g. '.mosdepth.global.dist.txt' if 'SAMPLE1.mosdepth.global.dist.txt'", metavar="input_suffix"), - make_option(c("-o", "--output_dir"), type="character", default='./', help="Output directory", metavar="path"), - make_option(c("-p", "--output_suffix"), type="character", default='global.dist', help="Output suffix", metavar="output_suffix")) - -opt_parser <- OptionParser(option_list=option_list) -opt <- parse_args(opt_parser) - -## Check input files -INPUT_FILES <- unique(unlist(strsplit(opt$input_files,","))) -if (length(INPUT_FILES) == 0) { - print_help(opt_parser) - stop("At least one input file must be supplied", call.=FALSE) -} -if (!all(file.exists(INPUT_FILES))) { - stop(paste("The following input files don't exist:",paste(INPUT_FILES[!file.exists(INPUT_FILES)], sep='', collapse=' '), sep=' '), call.=FALSE) -} - -## Check the output directory has a trailing slash, if not add one -OUTDIR <- opt$output_dir -if (tail(strsplit(OUTDIR,"")[[1]],1)!="/") { - OUTDIR <- paste(OUTDIR,"/",sep='') -} -## Create the directory if it doesn't already exist. -if (!file.exists(OUTDIR)) { - dir.create(OUTDIR,recursive=TRUE) -} - -OUTSUFFIX <- trimws(opt$output_suffix, "both", whitespace = "\\.") - -################################################ -################################################ -## READ IN DATA ## -################################################ -################################################ - -## Read in data -dat <- NULL -for (input_file in INPUT_FILES) { - sample = gsub(opt$input_suffix,'',basename(input_file)) - dat <- rbind(dat, cbind(read.delim(input_file, header=FALSE, sep='\t', stringsAsFactors=FALSE, check.names=FALSE)[,-4], sample, stringsAsFactors=F)) -} -colnames(dat) <- c('chrom', 'coverage', 'frequency', 'sample') -dat <- dat[which(dat$chrom == 'total'),][,2:ncol(dat)] - -################################################ -################################################ -## PER-SAMPLE COVERAGE PLOTS ## -################################################ -################################################ - -for (sample in unique(dat$sample)) { - sample_dat <- dat[dat$sample == sample,] - plot <- ggplot(sample_dat,aes(x=coverage,y=frequency)) + - geom_line(stat="identity") + - theme_bw() + - scale_x_continuous(expand=c(0, 0)) + - scale_y_continuous(limits=c(0,1), - breaks=seq(0,1,0.2), - labels=seq(0,1,0.2), - expand=c(0, 0)) + - ylab('Proportion of genome at coverage') + - xlab('Coverage') + - ggtitle(paste(sample,' genome coverage')) - - outfile <- paste(OUTDIR,sample,".",OUTSUFFIX,".coverage.pdf", sep='') - ggsave(file=outfile, plot, height=6, width=12, units="in") -} - -################################################ -################################################ -## COVERAGE PLOT ACROSS ALL SAMPLES ## -################################################ -################################################ - -if (length(INPUT_FILES) > 1) { - plot <- ggplot(dat,aes(x=coverage,y=frequency,colour=sample)) + - geom_line(stat="identity") + - theme_bw() + - scale_x_continuous(expand=c(0, 0)) + - scale_y_continuous(limits=c(0,1), - breaks=seq(0,1,0.2), - labels=seq(0,1,0.2), - expand=c(0, 0)) + - ylab('Proportion of genome at coverage') + - xlab('Coverage') + - ggtitle(paste('All samples genome coverage')) - - outfile <- paste(OUTDIR,"all_samples.",OUTSUFFIX,".coverage.pdf", sep='') - ggsave(file=outfile, plot, height=6, width=12, units="in") -} - -################################################ -################################################ -################################################ -################################################ From da4ef26092616d4a2b2454bac0e839b1678c8a0d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 12:29:06 +0100 Subject: [PATCH 050/129] Add mosdepth to MultiQC --- assets/multiqc_config.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 8613adff..49bc1e70 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -12,6 +12,7 @@ run_modules: - bowtie2 - samtools - picard + - mosdepth - varscan2 - ivar - bcftools @@ -74,6 +75,11 @@ module_order: info: 'This section of the report shows picard CollectMultipleMetrics and MarkDuplicates results after mapping (if "--protocol amplicon" this will be after primer sequence removal with iVar).' path_filters: - './picard/metrics/*' + - mosdepth: + name: 'VARIANTS: mosdepth' + info: 'This section of the report shows genome-wide coverage metrics generated by mosdepth.' + path_filters: + - './mosdepth/genome/*' - varscan2: name: 'VARIANTS: VarScan 2' info: 'This section of the report shows total number of variants called by VarScan 2 broken down by those that were reported or not.' From f1d93ad3c643d39cb0c9beeaf6dc4aa8d93d2038 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 12:29:12 +0100 Subject: [PATCH 051/129] Add mosdepth to MultiQC --- main.nf | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/main.nf b/main.nf index 7af7458c..faaaa12b 100644 --- a/main.nf +++ b/main.nf @@ -1111,6 +1111,7 @@ process MOSDEPTH_GENOME { tuple val(sample), val(single_end), path(bam) from ch_markdup_bam_mosdepth_genome output: + path "*.global.dist.txt" into ch_mosdepth_genome_mqc path "*.{txt,gz,csi,tsv,pdf}" script: @@ -1129,12 +1130,6 @@ process MOSDEPTH_GENOME { --input_suffix ${plot_suffix}.regions.bed.gz \\ --output_dir ./ \\ --output_suffix ${plot_suffix}.regions - - plot_mosdepth_dist.r \\ - --input_files ${prefix}.mosdepth.global.dist.txt \\ - --input_suffix ${plot_suffix}.mosdepth.global.dist.txt \\ - --output_dir ./ \\ - --output_suffix ${plot_suffix}.mosdepth.global.dist """ } @@ -1155,9 +1150,8 @@ if (params.protocol == 'amplicon') { path bed from ch_amplicon_bed output: - path "*.global.dist.txt" into ch_mosdepth_amplicon_region_dist path "*.regions.bed.gz" into ch_mosdepth_amplicon_region_bed - path "*.{summary.txt,region.dist.txt,per-base.bed.gz,thresholds.bed.gz,csi}" + path "*.{txt,gz,csi}" script: suffix = params.skip_markduplicates ? "" : ".mkD" @@ -1187,7 +1181,6 @@ if (params.protocol == 'amplicon') { !params.skip_variants && !params.skip_mosdepth input: - path dist from ch_mosdepth_amplicon_region_dist.collect() path bed from ch_mosdepth_amplicon_region_bed.collect() output: @@ -1202,12 +1195,6 @@ if (params.protocol == 'amplicon') { --input_suffix ${suffix}.regions.bed.gz \\ --output_dir ./ \\ --output_suffix ${suffix}.regions - - plot_mosdepth_dist.r \\ - --input_files ${dist.join(',')} \\ - --input_suffix ${suffix}.mosdepth.global.dist.txt \\ - --output_dir ./ \\ - --output_suffix ${suffix}.mosdepth.global.dist """ } } @@ -3187,6 +3174,7 @@ process MULTIQC { path ('picard/markdup/*') from ch_markdup_bam_flagstat_mqc.collect().ifEmpty([]) path ('picard/metrics/*') from ch_markdup_bam_metrics_mqc.collect().ifEmpty([]) path ('picard/metrics/*') from ch_picard_metrics_mqc.collect().ifEmpty([]) + path ('mosdepth/genome/*') from ch_mosdepth_genome_mqc.collect().ifEmpty([]) path ('varscan2/counts/lowfreq/*') from ch_varscan2_log_mqc.collect().ifEmpty([]) path ('varscan2/bcftools/highfreq/*') from ch_varscan2_bcftools_highfreq_mqc.collect().ifEmpty([]) path ('varscan2/snpeff/highfreq/*') from ch_varscan2_snpeff_highfreq_mqc.collect().ifEmpty([]) From 26cf182b626af43cacbe87690be64eae77ed87fc Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 13:56:26 +0100 Subject: [PATCH 052/129] Fix bug in sample name for cutadapt and insertsize --- bin/multiqc_to_custom_tsv.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/bin/multiqc_to_custom_tsv.py b/bin/multiqc_to_custom_tsv.py index 8afae320..dcb067ed 100755 --- a/bin/multiqc_to_custom_tsv.py +++ b/bin/multiqc_to_custom_tsv.py @@ -44,9 +44,11 @@ def yaml_fields_to_dict(YAMLFile,AppendDict={},FieldMappingList=[],ValidSampleLi for k in yaml_dict.keys(): key = k if os.path.basename(YAMLFile).startswith('multiqc_picard_insertSize'): - key = k[:-3] + if k[-3:] == '_FR': + key = k[:-3] if os.path.basename(YAMLFile).startswith('multiqc_cutadapt'): - names = [x for x in ValidSampleList if key.startswith(x)] + names = [x for x in ValidSampleList if key[:-2] == x] + names += [x for x in ValidSampleList if key == x] if names != []: key = names[0] inclSample = True From d6b68f61aa8592ec541f165e7889570bbc126039 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 16:31:06 +0100 Subject: [PATCH 053/129] Plot tweaks --- bin/plot_mosdepth_regions.r | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bin/plot_mosdepth_regions.r b/bin/plot_mosdepth_regions.r index fdd41b31..b384b1a8 100755 --- a/bin/plot_mosdepth_regions.r +++ b/bin/plot_mosdepth_regions.r @@ -143,6 +143,7 @@ if (ncol(dat) == 6 && length(INPUT_FILES) > 1) { column_names_side = "bottom", rect_gp = gpar(col="white", lwd=1), show_heatmap_legend = TRUE, + heatmap_legend_param = list(title_gp=gpar(fontsize = 8), labels_gp=gpar(fontsize=6), direction="horizontal"), row_names_gp = gpar(fontsize=6), column_names_gp = gpar(fontsize=6), height = unit(5, "mm")*nrow(mat), @@ -150,11 +151,11 @@ if (ncol(dat) == 6 && length(INPUT_FILES) > 1) { col = viridis(50)) ## Size of heatmaps scaled based on matrix dimensions: https://jokergoo.github.io/ComplexHeatmap-reference/book/other-tricks.html#set-the-same-cell-size-for-different-heatmaps-with-different-dimensions - height = 0.1969*nrow(mat) + 1.3150 - width = 0.1969*ncol(mat) + (2*1.3150) + height = 0.1969*nrow(mat) + 1.6 + width = 0.1969*ncol(mat) + (2*0.7) outfile <- paste(OUTDIR,"all_samples.",OUTSUFFIX,".heatmap.pdf", sep='') pdf(file=outfile, height=height, width=width) - draw(heatmap) + draw(heatmap, heatmap_legend_side="top") dev.off() } From 056b6e3b99bf4933f0768232038d3ea04a51b098 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 16:49:06 +0100 Subject: [PATCH 054/129] Fix log scale lables --- bin/plot_mosdepth_regions.r | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/plot_mosdepth_regions.r b/bin/plot_mosdepth_regions.r index b384b1a8..56cd56fd 100755 --- a/bin/plot_mosdepth_regions.r +++ b/bin/plot_mosdepth_regions.r @@ -97,7 +97,7 @@ for (sample in unique(dat$sample)) { coord_flip() + scale_x_discrete(expand=c(0, 0)) + scale_y_continuous(trans=log10_trans(), - breaks=trans_breaks('log10', function(x) 10^x), + breaks=10^c(0:10), labels=trans_format('log10', math_format(10^.x)), expand=c(0, 0)) + ylab(bquote('log'[10]~'(Coverage+1)')) + @@ -112,7 +112,7 @@ for (sample in unique(dat$sample)) { theme_bw() + scale_x_continuous(expand=c(0, 0)) + scale_y_continuous(trans=log10_trans(), - breaks=trans_breaks('log10', function(x) 10^x), + breaks=10^c(0:10), labels=trans_format('log10', math_format(10^.x)), expand=c(0, 0)) + ylab(bquote('log'[10]~'(Coverage+1)')) + From 1ed21234bdbfe54e31f0be455b17f2fe204787a1 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Wed, 17 Jun 2020 17:23:54 +0100 Subject: [PATCH 055/129] Fix log axis labels agaaaaiinnn --- bin/plot_mosdepth_regions.r | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/plot_mosdepth_regions.r b/bin/plot_mosdepth_regions.r index 56cd56fd..d76b64b8 100755 --- a/bin/plot_mosdepth_regions.r +++ b/bin/plot_mosdepth_regions.r @@ -100,6 +100,7 @@ for (sample in unique(dat$sample)) { breaks=10^c(0:10), labels=trans_format('log10', math_format(10^.x)), expand=c(0, 0)) + + expand_limits(y=1) + ylab(bquote('log'[10]~'(Coverage+1)')) + xlab('Amplicon') + ggtitle(paste(sample,'per amplicon coverage')) @@ -115,6 +116,7 @@ for (sample in unique(dat$sample)) { breaks=10^c(0:10), labels=trans_format('log10', math_format(10^.x)), expand=c(0, 0)) + + expand_limits(y=1) + ylab(bquote('log'[10]~'(Coverage+1)')) + xlab('Position (bp)') + ggtitle(paste(sample,'coverage')) @@ -151,7 +153,7 @@ if (ncol(dat) == 6 && length(INPUT_FILES) > 1) { col = viridis(50)) ## Size of heatmaps scaled based on matrix dimensions: https://jokergoo.github.io/ComplexHeatmap-reference/book/other-tricks.html#set-the-same-cell-size-for-different-heatmaps-with-different-dimensions - height = 0.1969*nrow(mat) + 1.6 + height = 0.1969*nrow(mat) + 2 width = 0.1969*ncol(mat) + (2*0.7) outfile <- paste(OUTDIR,"all_samples.",OUTSUFFIX,".heatmap.pdf", sep='') pdf(file=outfile, height=height, width=width) From 25acd8275b9673f034d6c8eb6d6035b3b44bda62 Mon Sep 17 00:00:00 2001 From: ggabernet Date: Wed, 17 Jun 2020 23:02:42 +0200 Subject: [PATCH 056/129] aws test update secrets names --- .github/workflows/awsfulltest.yml | 6 +++--- .github/workflows/awstest.yml | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 2eecea24..1cc3a9d5 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -21,9 +21,9 @@ jobs: run: conda install -c conda-forge awscli - name: Start AWS batch job env: - AWS_ACCESS_KEY_ID: ${{secrets.AWSTEST_KEY_ID}} - AWS_SECRET_ACCESS_KEY: ${{secrets.AWSTEST_KEY_SECRET}} - TOWER_ACCESS_TOKEN: ${{secrets.AWSTEST_TOWER_TOKEN}} + AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY_ID}} + AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}} + TOWER_ACCESS_TOKEN: ${{secrets.AWS_TOWER_TOKEN}} #AWS_JOB_DEFINITION: ${{secrets.AWS_JOB_DEFINITION}} AWS_JOB_QUEUE: ${{secrets.AWS_JOB_QUEUE}} AWS_S3_BUCKET: ${{secrets.AWS_S3_BUCKET}} diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 92f99f30..656a4e92 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -23,9 +23,9 @@ jobs: run: conda install -c conda-forge awscli - name: Start AWS batch job env: - AWS_ACCESS_KEY_ID: ${{secrets.AWSTEST_KEY_ID}} - AWS_SECRET_ACCESS_KEY: ${{secrets.AWSTEST_KEY_SECRET}} - TOWER_ACCESS_TOKEN: ${{secrets.AWSTEST_TOWER_TOKEN}} + AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY_ID}} + AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}} + TOWER_ACCESS_TOKEN: ${{secrets.AWS_TOWER_TOKEN}} #AWS_JOB_DEFINITION: ${{secrets.AWS_JOB_DEFINITION}} AWS_JOB_QUEUE: ${{secrets.AWS_JOB_QUEUE}} AWS_S3_BUCKET: ${{secrets.AWS_S3_BUCKET}} From 61484948388f6d51131fda5d83f07317e1d7108f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 12:08:56 +0100 Subject: [PATCH 057/129] Add mosdepth to output docs --- docs/output.md | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/docs/output.md b/docs/output.md index 6401dd20..faf7693a 100644 --- a/docs/output.md +++ b/docs/output.md @@ -19,6 +19,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d * [iVar trim](#ivar-trim) - Primer sequence removal for amplicon data * [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking and removal * [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) - Whole genome coverage and alignment metrics + * [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics * [VarScan 2, BCFTools, BEDTools](#varscan-2-bcftools-bedtools) *||* [iVar variants and iVar consensus](#ivar-variants-and-ivar-consensus) *||* [BCFTools and BEDTools](#bcftools-and-bedtools) - Variant calling and consensus sequence generation * [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction * [QUAST](#quast) - Consensus assessment report @@ -181,6 +182,42 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- > **NB:** The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. +### mosdepth + +[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. + +**Output files:** + +* `variants/bam/mosdepth/genome/` + * `..genome.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..genome.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..genome.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + * `..genome.per-base.bed.gz`: Per-base depth output genome-wide. + * `..genome.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..genome.regions.bed.gz`: Mean regional depth for 200bp windows genome-wide. + * `..genome.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. +* `variants/bam/mosdepth/genome/plots/` + * `all_samples..genome.regions.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + * `..genome.regions.coverage.pdf`: Whole-genome coverage plot. + * `..genome.regions.coverage.tsv`: File containing coverage values for the above plot. +* `variants/bam/mosdepth/amplicon/` + * `..amplicon.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..amplicon.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..amplicon.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + * `..amplicon.per-base.bed.gz`: Per-base depth output genome-wide. + * `..amplicon.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..amplicon.regions.bed.gz`: Mean regional depth for individual amplicons genome-wide. + * `..amplicon.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..amplicon.thresholds.bed.gz`: Threshold output to indicate how many bases in each region are covered at given thresholds. + * `..amplicon.thresholds.bed.gz.csi`: CSI index that can be used for tabix queries from above file. +* `variants/bam/mosdepth/amplicon/plots/` + * `all_samples..amplicon.regions.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + * `all_samples..amplicon.regions.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + * `..amplicon.regions.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + * `..amplicon.regions.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + +> **NB:** The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. + ### VarScan 2, BCFTools, BEDTools [VarScan 2](http://dkoboldt.github.io/varscan/) is a platform-independent software tool to detect variants in NGS data. In this pipeline, VarScan 2 is used in conjunction with SAMtools in order to call both high and low frequency variants. From 6c8ffed40c59633acae61330326e6a14a3741e1c Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 12:18:51 +0100 Subject: [PATCH 058/129] Add collapsible list --- docs/output.md | 65 ++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/docs/output.md b/docs/output.md index faf7693a..67f53c6e 100644 --- a/docs/output.md +++ b/docs/output.md @@ -186,37 +186,40 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- [mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. -**Output files:** - -* `variants/bam/mosdepth/genome/` - * `..genome.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. - * `..genome.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. - * `..genome.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. - * `..genome.per-base.bed.gz`: Per-base depth output genome-wide. - * `..genome.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. - * `..genome.regions.bed.gz`: Mean regional depth for 200bp windows genome-wide. - * `..genome.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. -* `variants/bam/mosdepth/genome/plots/` - * `all_samples..genome.regions.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. - * `..genome.regions.coverage.pdf`: Whole-genome coverage plot. - * `..genome.regions.coverage.tsv`: File containing coverage values for the above plot. -* `variants/bam/mosdepth/amplicon/` - * `..amplicon.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. - * `..amplicon.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. - * `..amplicon.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. - * `..amplicon.per-base.bed.gz`: Per-base depth output genome-wide. - * `..amplicon.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. - * `..amplicon.regions.bed.gz`: Mean regional depth for individual amplicons genome-wide. - * `..amplicon.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. - * `..amplicon.thresholds.bed.gz`: Threshold output to indicate how many bases in each region are covered at given thresholds. - * `..amplicon.thresholds.bed.gz.csi`: CSI index that can be used for tabix queries from above file. -* `variants/bam/mosdepth/amplicon/plots/` - * `all_samples..amplicon.regions.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. - * `all_samples..amplicon.regions.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. - * `..amplicon.regions.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. - * `..amplicon.regions.coverage.tsv`: File containing per-amplicon coverage values for the above plot. - -> **NB:** The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. +
+ **Output files:** + + * `variants/bam/mosdepth/genome/` + * `..genome.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..genome.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..genome.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + * `..genome.per-base.bed.gz`: Per-base depth output genome-wide. + * `..genome.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..genome.regions.bed.gz`: Mean regional depth for 200bp windows genome-wide. + * `..genome.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `variants/bam/mosdepth/genome/plots/` + * `all_samples..genome.regions.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + * `..genome.regions.coverage.pdf`: Whole-genome coverage plot. + * `..genome.regions.coverage.tsv`: File containing coverage values for the above plot. + * `variants/bam/mosdepth/amplicon/` + * `..amplicon.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..amplicon.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..amplicon.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + * `..amplicon.per-base.bed.gz`: Per-base depth output genome-wide. + * `..amplicon.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..amplicon.regions.bed.gz`: Mean regional depth for individual amplicons genome-wide. + * `..amplicon.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..amplicon.thresholds.bed.gz`: Threshold output to indicate how many bases in each region are covered at given thresholds. + * `..amplicon.thresholds.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `variants/bam/mosdepth/amplicon/plots/` + * `all_samples..amplicon.regions.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + * `all_samples..amplicon.regions.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + * `..amplicon.regions.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + * `..amplicon.regions.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + + > **NB:** The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. + +
### VarScan 2, BCFTools, BEDTools From ef2d30004d2581028679a590ceff12c76bf7f166 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 12:29:30 +0100 Subject: [PATCH 059/129] Adjust spacing --- .github/markdownlint.yml | 2 ++ docs/output.md | 66 ++++++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml index 96b12a70..e84d9242 100644 --- a/.github/markdownlint.yml +++ b/.github/markdownlint.yml @@ -3,3 +3,5 @@ default: true, line-length: false no-duplicate-header: siblings_only: true +MD033: + allowed_elements: [details, summary] diff --git a/docs/output.md b/docs/output.md index 67f53c6e..e2e36f76 100644 --- a/docs/output.md +++ b/docs/output.md @@ -152,7 +152,7 @@ If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pa Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. By default, the pipeline uses picard MarkDuplicates to *mark* the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. However, you can also choose to remove any reads identified as duplicates via the `--filter_dups` parameter. -**Output files:** +**Output files** * `variants/bam/` * `..sorted.bam`: Coordinate sorted BAM file after duplicate marking. @@ -187,38 +187,38 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- [mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts.
- **Output files:** - - * `variants/bam/mosdepth/genome/` - * `..genome.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. - * `..genome.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. - * `..genome.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. - * `..genome.per-base.bed.gz`: Per-base depth output genome-wide. - * `..genome.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. - * `..genome.regions.bed.gz`: Mean regional depth for 200bp windows genome-wide. - * `..genome.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. - * `variants/bam/mosdepth/genome/plots/` - * `all_samples..genome.regions.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. - * `..genome.regions.coverage.pdf`: Whole-genome coverage plot. - * `..genome.regions.coverage.tsv`: File containing coverage values for the above plot. - * `variants/bam/mosdepth/amplicon/` - * `..amplicon.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. - * `..amplicon.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. - * `..amplicon.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. - * `..amplicon.per-base.bed.gz`: Per-base depth output genome-wide. - * `..amplicon.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. - * `..amplicon.regions.bed.gz`: Mean regional depth for individual amplicons genome-wide. - * `..amplicon.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. - * `..amplicon.thresholds.bed.gz`: Threshold output to indicate how many bases in each region are covered at given thresholds. - * `..amplicon.thresholds.bed.gz.csi`: CSI index that can be used for tabix queries from above file. - * `variants/bam/mosdepth/amplicon/plots/` - * `all_samples..amplicon.regions.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. - * `all_samples..amplicon.regions.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. - * `..amplicon.regions.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. - * `..amplicon.regions.coverage.tsv`: File containing per-amplicon coverage values for the above plot. - - > **NB:** The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. - + Output files + +* `variants/bam/mosdepth/genome/` + * `..genome.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..genome.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..genome.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + * `..genome.per-base.bed.gz`: Per-base depth output genome-wide. + * `..genome.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..genome.regions.bed.gz`: Mean regional depth for 200bp windows genome-wide. + * `..genome.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. +* `variants/bam/mosdepth/genome/plots/` + * `all_samples..genome.regions.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + * `..genome.regions.coverage.pdf`: Whole-genome coverage plot. + * `..genome.regions.coverage.tsv`: File containing coverage values for the above plot. +* `variants/bam/mosdepth/amplicon/` + * `..amplicon.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..amplicon.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..amplicon.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + * `..amplicon.per-base.bed.gz`: Per-base depth output genome-wide. + * `..amplicon.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..amplicon.regions.bed.gz`: Mean regional depth for individual amplicons genome-wide. + * `..amplicon.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..amplicon.thresholds.bed.gz`: Threshold output to indicate how many bases in each region are covered at given thresholds. + * `..amplicon.thresholds.bed.gz.csi`: CSI index that can be used for tabix queries from above file. +* `variants/bam/mosdepth/amplicon/plots/` + * `all_samples..amplicon.regions.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + * `all_samples..amplicon.regions.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + * `..amplicon.regions.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + * `..amplicon.regions.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + +> **NB:** The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. +
### VarScan 2, BCFTools, BEDTools From cd53dd8cc13d7847881a39bfa784efc0207b629f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 12:52:05 +0100 Subject: [PATCH 060/129] Add collapsible lists for all output sections --- docs/output.md | 196 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 140 insertions(+), 56 deletions(-) diff --git a/docs/output.md b/docs/output.md index e2e36f76..9876a4b9 100644 --- a/docs/output.md +++ b/docs/output.md @@ -44,7 +44,8 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d Please see the [usage docs](https://github.com/nf-core/viralrecon/blob/master/docs/usage.md#supported-public-repository-ids) for a list of supported public repository identifiers and how to provide them to the pipeline. The final sample information for all identifiers is obtained from the ENA which provides direct download links for FastQ files as well as their associated md5sums. If a download link exists, the files will be downloaded by FTP otherwise they will be downloaded using [parallel-fastq-dump](https://github.com/rvalieris/parallel-fastq-dump). -**Output files:** +
+ Output files * `preprocess/sra/` * `sra_run_info.tsv`: Run information file for all samples to be downloaded from the ENA/SRA. @@ -56,6 +57,8 @@ Please see the [usage docs](https://github.com/nf-core/viralrecon/blob/master/do > **NB:** Downloaded FastQ files will only be saved in the results directory if the `--save_sra_fastq` parameter is supplied. +
+ ### cat If multiple libraries/runs have been provided for the same sample in the input samplesheet (e.g. to increase sequencing depth) then these will be merged at the very beginning of the pipeline in order to have consistent sample naming throughout the pipeline. Please refer to the [usage docs](https://github.com/nf-core/viralrecon/blob/dev/docs/usage.md#format) to see how to specify these samples in the input samplesheet. @@ -64,22 +67,28 @@ If multiple libraries/runs have been provided for the same sample in the input s [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). -**Output files:** +![MultiQC - FastQC per base sequence plot](images/mqc_fastqc_plot.png) + +
+ Output files * `preprocess/fastqc/` * `*_fastqc.html`: FastQC report containing quality metrics. * `preprocess/fastqc/zips/` * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. -![MultiQC - FastQC per base sequence plot](images/mqc_fastqc_plot.png) - > **NB:** The FastQC plots in this directory are generated relative to the raw, input reads. They may contain adapter sequence and regions of low quality. To see how your reads look after trimming please refer to the FastQC reports in the `preprocess/fastp/fastqc/` directory. +
+ ### fastp [fastp](https://github.com/OpenGene/fastp) is a tool designed to provide fast, all-in-one preprocessing for FastQ files. It has been developed in C++ with multithreading support to achieve higher performance. fastp is used in this pipeline for standard adapter trimming and quality filtering. -**Output files:** +![MultiQC - fastp filtered reads plot](images/mqc_fastp_plot.png) + +
+ Output files * `preprocess/fastp/` * `*.fastp.html`: Trimming report in html format. @@ -93,10 +102,10 @@ If multiple libraries/runs have been provided for the same sample in the input s * `preprocess/fastp/fastqc/zips/` * `*.trim_fastqc.zip`: Zip archive containing the FastQC report. -![MultiQC - fastp filtered reads plot](images/mqc_fastp_plot.png) - > **NB:** Post-trimmed FastQ files will only be saved in the results directory if the `--save_trimmed` parameter is supplied. +
+ ## Variant calling A file called `summary_variants_metrics_mqc.tsv` containing a selection of read and variant calling metrics will be saved in the `variants/` results directory. The same metrics have also been added to the top of the MultiQC report. @@ -105,20 +114,26 @@ A file called `summary_variants_metrics_mqc.tsv` containing a selection of read [Bowtie 2](http://bio-bwa.sourceforge.net/) is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. Bowtie 2 supports gapped, local, and paired-end alignment modes. -**Output files:** +![MultiQC - Bowtie2 alignment score plot](images/mqc_bowtie2_plot.png) + +
+ Output files * `variants/bam/` * `.bam`: Original BAM file created by Bowtie 2. Only present if `--save_align_intermeds` parameter is supplied. * `variants/bam/log/` * `.bowtie2.log`: Bowtie 2 mapping log file. -![MultiQC - Bowtie2 alignment score plot](images/mqc_bowtie2_plot.png) +
### SAMtools Bowtie 2 BAM files are further processed with [SAMtools](http://samtools.sourceforge.net/) to sort them by coordinate, for indexing, as well as to generate read mapping statistics. -**Output files:** +![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) + +
+ Output files * `variants/bam/` * `.sorted.bam`: Coordinate sorted BAM file containing read alignment information. @@ -126,15 +141,18 @@ Bowtie 2 BAM files are further processed with [SAMtools](http://samtools.sourcef * `variants/bam/samtools_stats/` * SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. -![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) - > **NB:** BAM files and their associated indices will only be saved in the results directory if the `--save_align_intermeds` parameter is supplied. +
+ ### iVar trim If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pasteur.fr/docs/ivar/1.0/manualpage.html) is used to trim amplicon primer sequences from the aligned reads. iVar uses the primer positions supplied in `--amplicon_bed` to soft clip primer sequences from a coordinate sorted BAM file. -**Output files:** +![MultiQC - iVar trim primer heatmap](images/mqc_ivar_trim_plot.png) + +
+ Output files * `variants/bam/` * `.trim.sorted.bam`: Coordinate sorted BAM file after primer trimming. @@ -144,15 +162,18 @@ If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pa * `variants/bam/log/` * `.trim.ivar.log`: iVar trim log file obtained from stdout. -![MultiQC - iVar trim primer heatmap](images/mqc_ivar_trim_plot.png) - > **NB:** Post-trimmed BAM files and their associated indices will only be saved in the results directory if the `--save_align_intermeds` parameter is supplied. +
+ ### picard MarkDuplicates Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. By default, the pipeline uses picard MarkDuplicates to *mark* the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. However, you can also choose to remove any reads identified as duplicates via the `--filter_dups` parameter. -**Output files** +![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_duplicates_plot.png) + +
+ Output files * `variants/bam/` * `..sorted.bam`: Coordinate sorted BAM file after duplicate marking. @@ -162,26 +183,29 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- * `variants/bam/picard_metrics/` * `..MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates. -![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_duplicates_plot.png) - > **NB:** The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. +
+ ### picard CollectMultipleMetrics [picard-tools](https://broadinstitute.github.io/picard/command-line-overview.html) is a set of command-line tools for manipulating high-throughput sequencing data. We use picard-tools in this pipeline to obtain mapping and coverage metrics. -**Output files:** +![MultiQC - Picard whole genome coverage plot](images/mqc_picard_wgs_coverage_plot.png) + +![MultiQC - Picard insert size plot](images/mqc_picard_insert_size_plot.png) + +
+ Output files * `variants/bam/picard_metrics/` * `..CollectMultipleMetrics.*`: Alignment QC files from picard CollectMultipleMetrics in `*_metrics` textual format and plotted in `*.pdf` format. * `..CollectWgsMetrics.coverage_metrics`: Coverage metrics file from CollectWgsMetrics. -![MultiQC - Picard whole genome coverage plot](images/mqc_picard_wgs_coverage_plot.png) - -![MultiQC - Picard insert size plot](images/mqc_picard_insert_size_plot.png) - > **NB:** The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. +
+ ### mosdepth [mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. @@ -217,7 +241,7 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- * `..amplicon.regions.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. * `..amplicon.regions.coverage.tsv`: File containing per-amplicon coverage values for the above plot. -> **NB:** The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. +> NB: The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. @@ -229,7 +253,10 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- [BEDTools](https://bedtools.readthedocs.io/en/latest/) is a swiss-army knife of tools for a wide-range of genomics analysis tasks. In this pipeline we use `bedtools genomecov` to compute the per-base mapped read coverage in bedGraph format, and `bedtools maskfasta` to mask sequences in a Fasta file based on intervals defined in a feature file. This may be useful for creating your own masked genome file based on custom annotations or for masking all but your target regions when aligning sequence data from a targeted capture experiment. -**Output files:** +![MultiQC - VarScan 2 variants called plot](images/mqc_varscan2_plot.png) + +
+ Output files * `variants/varscan2/` * `.vcf.gz`: Low frequency variants VCF file. @@ -247,16 +274,19 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- * `variants/bam/mpileup/` * `..mpileup`: mpileup files summarize all the data from aligned reads at a given genomic position. Each row of the mpileup file gives similar information to a single vertical column of reads as visualised in IGV. -![MultiQC - VarScan 2 variants called plot](images/mqc_varscan2_plot.png) - > **NB:** The value of `` in the output file names above is determined by the `--max_allele_freq` parameter (Default: 0.8). > **NB:** Output mpileup files will only be saved in the directory if the `--save_mpileup` parameter is supplied. The naming convention for these files will depend on the preceeding steps that were run in the pipeline as described in the paragraph explaining the value of `` in the section above. +
+ ### iVar variants and iVar consensus [iVar](https://github.com/andersen-lab/ivar/blob/master/docs/MANUAL.md) is a computational package that contains functions broadly useful for viral amplicon-based sequencing. We use iVar in this pipeline to [trim primer sequences](#ivar-trim) for amplicon input data as well as to call variants and for consensus sequence generation. -**Output files:** +![MultiQC - iVar variants called plot](images/mqc_ivar_variants_plot.png) + +
+ Output files * `variants/ivar/` * `.tsv`: Low frequency variants in TSV format. @@ -274,13 +304,16 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- * `.bcftools_stats.txt`: Statistics and counts obtained from low frequency variants VCF file. * `.AF.bcftools_stats.txt`: Statistics and counts obtained from high frequency variants VCF file. -![MultiQC - iVar variants called plot](images/mqc_ivar_variants_plot.png) +
### BCFTools and BEDTools [BCFtools](http://samtools.github.io/bcftools/bcftools.html) can be used to call variants directly from BAM alignment files. The functionality to call variants with BCFTools in this pipeline was inspired by work carried out by [Conor Walker](https://github.com/conorwalker/covid19/blob/3cb26ec399417bedb7e60487415c78a405f517d6/scripts/call_variants.sh). In contrast to VarScan 2 and iVar, the original variant calls obtained by BCFTools are not filtered further by a higher allele frequency. It seems that the default calls obtained by BCFTools appear to be comparable with the high frequency variants generated by VarScan 2 and iVar. -**Output files:** +![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) + +
+ Output files * `variants/bcftools/` * `.vcf.gz`: Variants VCF file. @@ -291,7 +324,7 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- * `variants/bcftools/bcftools_stats/` * `.bcftools_stats.txt`: Statistics and counts obtained from VCF file. -![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) +
### SnpEff and SnpSift @@ -299,7 +332,10 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- [SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. -**Output files:** +![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) + +
+ Output files * `variants//snpeff/` * `*.snpEff.csv`: Variant annotation csv file. @@ -309,21 +345,24 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- * `*.snpEff.vcf.gz.tbi`: Index for VCF file with variant annotations. * `*.snpSift.table.txt`: SnpSift summary table. -![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) - > **NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'varscan2,ivar,bcftools'). If applicable, you will have two sets of files where the file name prefix will be `` for low-frequency variants and `.AF` for high frequency variants. +
+ ### QUAST [QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. -**Output files:** +
+ Output files * `variants//quast/AF/` * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. > **NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'varscan2,ivar,bcftools') and the value of `` is determined by the `--max_allele_freq` parameter (Default: 0.8). +
+ ## De novo assembly A file called `summary_assembly_metrics_mqc.tsv` containing a selection of read and *de novo* assembly related metrics will be saved in the `assembly/` results directory. The same metrics have also been added to the top of the MultiQC report. @@ -332,7 +371,10 @@ A file called `summary_assembly_metrics_mqc.tsv` containing a selection of read In the variant calling branch of the pipeline we are using [iVar trim](#ivar-trim) to remove primer sequences from the aligned BAM files for amplicon data. Since in the *de novo* assembly branch we don't align the reads, we use [Cutadapt](https://cutadapt.readthedocs.io/en/stable/guide.html) as an alternative option to remove and clean the primer sequences directly from FastQ files. -**Output files:** +![MultiQC - Cutadapt filtered reads plot](images/mqc_cutadapt_plot.png) + +
+ Output files * `assembly/cutadapt/` * `*.ptrim.fastq.gz`: FastQ files after primer sequence trimming. @@ -343,34 +385,38 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri * `assembly/cutadapt/fastqc/zips/` * `*.ptrim_fastqc.zip`: Zip archive containing the FastQC report. -![MultiQC - Cutadapt filtered reads plot](images/mqc_cutadapt_plot.png) - > **NB:** Trimmed FastQ files will only be saved in the results directory if the `--save_trimmed` parameter is supplied. +
+ ### Kraken 2 [Kraken 2](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual) is a sequence classifier that assigns taxonomic labels to DNA sequences. Kraken 2 examines the k-mers within a query sequence and uses the information within those k-mers to query a database. That database maps k-mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer. We used a Kraken 2 database in this workflow to filter out reads specific to the host genome. The remainder of the reads are then passed to numerous *de novo* assembly algorithms in order to reconstruct the viral genome. -**Output files:** +![MultiQC - Kraken 2 classification plot](images/mqc_kraken2_plot.png) + +
+ Output files * `assembly/kraken2/` * `*.host*.fastq.gz`: Reads that were classified to the host database. * `*.viral*.fastq.gz`: Reads that were unclassified to the host database. * `*.kraken2.report.txt`: Kraken 2 taxonomic report. See [here](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual#sample-report-output-format) for a detailed description of the format. -![MultiQC - Kraken 2 classification plot](images/mqc_kraken2_plot.png) - > **NB:** Output FastQ files will only be saved in the results directory if the `--save_kraken2_fastq` parameter is supplied. +
+ ### SPAdes [SPAdes](http://cab.spbu.ru/software/spades/) is an assembly toolkit containing various assembly pipelines. Generically speaking, SPAdes is one of the most popular de Bruijn graph-based assembly algorithms used for bacterial/viral genome reconstruction. [Bandage](https://rrwick.github.io/Bandage/) is a program for visualising *de novo* assembly graphs. By displaying connections which are not present in the contigs file, Bandage opens up new possibilities for analysing *de novo* assemblies. -**Output files:** +
+ Output files * `assembly/spades/` * `*.scaffolds.fa`: SPAdes scaffold assembly. @@ -379,11 +425,14 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the * `*.png`: Bandage visualisation for SPAdes assembly graph in PNG format. * `*.svg`: Bandage visualisation for SPAdes assembly graph in SVG format. +
+ ### metaSPAdes [metaSPAdes](http://cab.spbu.ru/software/meta-spades/) is a de Bruijn graph-based assembler that is distributed with SPAdes and executed via the `--meta` option. It can be used for the simultaneous reconstruction of multiple genomes as observed in metagenomics data. -**Output files:** +
+ Output files * `assembly/metaspades/` * `*.scaffolds.fa`: metaSPAdes scaffold assembly. @@ -392,11 +441,14 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the * `*.png`: Bandage visualisation for metaSPAdes assembly graph in PNG format. * `*.svg`: Bandage visualisation for metaSPAdes assembly graph in SVG format. +
+ ### Unicycler [Unicycler](https://github.com/rrwick/Unicycler) is an assembly pipeline for bacterial genomes. It can assemble Illumina-only read sets where it functions as a SPAdes-optimiser. -**Output files:** +
+ Output files * `assembly/unicycler/` * `*.scaffolds.fa`: Unicycler scaffold assembly. @@ -405,22 +457,28 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the * `*.png`: Bandage visualisation for Unicycler assembly graph in PNG format. * `*.svg`: Bandage visualisation for Unicycler assembly graph in SVG format. +
+ ### minia [Minia](https://github.com/GATB/minia) is a short-read assembler based on a de Bruijn graph, capable of assembling a human genome on a desktop computer in a day. The output of Minia is a set of contigs. Minia produces results of similar contiguity and accuracy to other de Bruijn assemblers. -**Output files:** +
+ Output files * `assembly/minia//` * `*.scaffolds.fa`: Minia scaffold assembly. > **NB:** The value of `` in the output directory name above is determined by the `--minia_kmer` parameter (Default: 31). +
+ ### BLAST [blastn](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch) is used to align the assembled contigs against the virus reference genome. -**Output files:** +
+ Output files * `assembly//blast/` * `*.blast.txt`: BLAST results against the target virus. @@ -428,11 +486,14 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the > **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades,metaspades,unicycler,minia'). +
+ ### ABACAS [ABACAS](https://www.sanger.ac.uk/science/tools/pagit) was developed to rapidly contiguate (align, order, orientate), visualize and design primers to close gaps on shotgun assembled contigs based on a reference sequence. -**Output files:** +
+ Output files * `assembly//abacas/` * `*.abacas.bin`: Bin file that contains contigs that are not used in ordering. @@ -447,11 +508,14 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the > **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades,metaspades,unicycler,minia'). +
+ ### PlasmidID [PlasmidID](https://github.com/BU-ISCIII/plasmidID) was used to graphically represent the alignment of the reference genome relative to a given assembly. This helps to visualize the coverage of the reference genome in the assembly. To find more information about the output files refer to the [documentation](https://github.com/BU-ISCIII/plasmidID/wiki/Understanding-the-image:-track-by-track). -**Output files:** +
+ Output files * `assembly//plasmidid//` * `images/_.png`: PNG file with the visualization of the alignment between the viral assembly and the reference viral genome. @@ -462,19 +526,24 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the > **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades,metaspades,unicycler,minia'). +
+ ### Assembly QUAST [QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the *de novo* assemblies across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. -**Output files:** +![MultiQC - QUAST contig counts](images/mqc_quast_plot.png) + +
+ Output files * `assembly//quast/` * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. -![MultiQC - QUAST contig counts](images/mqc_quast_plot.png) - > **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades,metaspades,unicycler,minia'). +
+ ### Minimap2, seqwish, vg [Minimap2](https://github.com/lh3/minimap2) is a versatile sequence alignment program that aligns DNA or mRNA sequences against a large reference database. Minimap2 was used to generate all-versus-all alignments between scaffold assembly contigs and the reference genome. @@ -485,7 +554,8 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the [Bandage](https://github.com/rrwick/Bandage), a Bioinformatics Application for Navigating De novo Assembly Graphs Easily, is a GUI program that allows users to interact with the assembly graphs made by de novo assemblers and other graphs in GFA format. Bandage was used to render induced genome variation graphs as static PNG and SVG images. -**Output files:** +
+ Output files * `assembly//variants/` * `*.gfa`: Induced genome variation graph. @@ -499,13 +569,16 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the > **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades,metaspades,unicycler,minia'). +
+ ### Assembly SnpEff and SnpSift [SnpEff](http://snpeff.sourceforge.net/SnpEff.html) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). [SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. -**Output files:** +
+ Output files * `assembly//variants/snpeff/` * `*.snpEff.csv`: Variant annotation csv file. @@ -517,6 +590,8 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the > **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades,metaspades,unicycler,minia'). +
+ ## Workflow reporting and genomes ### MultiQC @@ -531,18 +606,22 @@ The pipeline has special steps which also allow the software versions to be repo Please click [here](https://raw.githack.com/nf-core/viralrecon/master/docs/html/multiqc_report.html) to see an example MultiQC report generated using the parameters defined in [this configuration file](https://github.com/nf-core/viralrecon/blob/master/conf/test_full.config) to run the pipeline on [samples](https://zenodo.org/record/3735111) which were prepared from the [ncov-2019 ARTIC Network V1 amplicon set](https://artic.network/ncov-2019) and sequenced on the Illumina MiSeq platform in 301bp paired-end format. -**Output files:** +
+ Output files * `multiqc/` * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. * `multiqc_plots/`: directory containing static images from the report in various formats. +
+ ### Reference genome files A number of genome-specific files are generated by the pipeline because they are required for the downstream processing of the results. If the `--save_reference` parameter is provided then the Bowtie 2 alignment indices, BLAST and Kraken 2 databases downloaded/generated by the pipeline will be saved in the `genome/` directory. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build a Kraken 2 database for the host genome. This can be quite a time-consuming process and it permits their reuse for future runs of the pipeline or for other purposes. -**Output files:** +
+ Output files * `genome/` * `BlastDB/`: BLAST database for viral genome. @@ -553,14 +632,19 @@ A number of genome-specific files are generated by the pipeline because they are * Unzipped genome fasta file for viral genome * Unzipped genome annotation GFF file for viral genome +
+ ### Pipeline information [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. -**Output files:** +
+ Output files * `pipeline_info/` * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.csv`. * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. * Documentation for interpretation of results in HTML format: `results_description.html`. + +
From b1d63d9ba70aa41433707695589b1b3d58f6e8df Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 14:25:19 +0100 Subject: [PATCH 061/129] Add html lists everywhere...yippeeee --- docs/output.md | 400 ++++++++++++++++++++++++------------------------- 1 file changed, 200 insertions(+), 200 deletions(-) diff --git a/docs/output.md b/docs/output.md index 9876a4b9..ac61fafe 100644 --- a/docs/output.md +++ b/docs/output.md @@ -9,34 +9,34 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: * [Preprocessing](#Preprocessing) - * [parallel-fastq-dump](#parallel-fastq-dump) - Download samples from SRA - * [cat](#cat) - Merge re-sequenced FastQ files - * [FastQC](#fastqc) - Raw read QC - * [fastp](#fastp) - Adapter and quality trimming + * [parallel-fastq-dump](#parallel-fastq-dump) - Download samples from SRA + * [cat](#cat) - Merge re-sequenced FastQ files + * [FastQC](#fastqc) - Raw read QC + * [fastp](#fastp) - Adapter and quality trimming * [Variant calling](#variant-calling) - * [Bowtie 2](#bowtie-2) - Read alignment relative to reference genome - * [SAMtools](#samtools) - Sort, index and generate metrics for alignments - * [iVar trim](#ivar-trim) - Primer sequence removal for amplicon data - * [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking and removal - * [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) - Whole genome coverage and alignment metrics - * [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics - * [VarScan 2, BCFTools, BEDTools](#varscan-2-bcftools-bedtools) *||* [iVar variants and iVar consensus](#ivar-variants-and-ivar-consensus) *||* [BCFTools and BEDTools](#bcftools-and-bedtools) - Variant calling and consensus sequence generation - * [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction - * [QUAST](#quast) - Consensus assessment report + * [Bowtie 2](#bowtie-2) - Read alignment relative to reference genome + * [SAMtools](#samtools) - Sort, index and generate metrics for alignments + * [iVar trim](#ivar-trim) - Primer sequence removal for amplicon data + * [picard MarkDuplicates](#picard-markduplicates) - Duplicate read marking and removal + * [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) - Whole genome coverage and alignment metrics + * [mosdepth](#mosdepth) - Whole-genome and amplicon coverage metrics + * [VarScan 2, BCFTools, BEDTools](#varscan-2-bcftools-bedtools) *||* [iVar variants and iVar consensus](#ivar-variants-and-ivar-consensus) *||* [BCFTools and BEDTools](#bcftools-and-bedtools) - Variant calling and consensus sequence generation + * [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction + * [QUAST](#quast) - Consensus assessment report * [De novo assembly](#de-novo-assembly) - * [Cutadapt](#cutadapt) - Primer trimming for amplicon data - * [Kraken 2](#kraken-2) - Removal of host reads - * [SPAdes](#spades) *||* [metaSPAdes](#metaspades) *||* [Unicycler](#unicycler) *||* [minia](#minia) - Viral genome assembly - * [BLAST](#blast) - Blast to reference assembly - * [ABACAS](#abacas) - Order contigs according to reference genome - * [PlasmidID](#plasmidid) - Assembly report and visualisation - * [Assembly QUAST](#assembly-quast) - Assembly quality assessment - * [Minimap2, seqwish, vg](#minimap2-seqwish-vg) - Call variants from induced genome variation graph - * [Assembly SnpEff and SnpSift](#assembly-snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction + * [Cutadapt](#cutadapt) - Primer trimming for amplicon data + * [Kraken 2](#kraken-2) - Removal of host reads + * [SPAdes](#spades) *||* [metaSPAdes](#metaspades) *||* [Unicycler](#unicycler) *||* [minia](#minia) - Viral genome assembly + * [BLAST](#blast) - Blast to reference assembly + * [ABACAS](#abacas) - Order contigs according to reference genome + * [PlasmidID](#plasmidid) - Assembly report and visualisation + * [Assembly QUAST](#assembly-quast) - Assembly quality assessment + * [Minimap2, seqwish, vg](#minimap2-seqwish-vg) - Call variants from induced genome variation graph + * [Assembly SnpEff and SnpSift](#assembly-snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction * [Workflow reporting and genomes](#workflow-reporting-and-genomes) - * [MultiQC](#multiqc) - Present QC for raw reads, alignment, assembly and variant calling - * [Reference genome files](#reference-genome-files) - Saving reference genome indices/files - * [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution + * [MultiQC](#multiqc) - Present QC for raw reads, alignment, assembly and variant calling + * [Reference genome files](#reference-genome-files) - Saving reference genome indices/files + * [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution ## Preprocessing @@ -44,16 +44,16 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d Please see the [usage docs](https://github.com/nf-core/viralrecon/blob/master/docs/usage.md#supported-public-repository-ids) for a list of supported public repository identifiers and how to provide them to the pipeline. The final sample information for all identifiers is obtained from the ENA which provides direct download links for FastQ files as well as their associated md5sums. If a download link exists, the files will be downloaded by FTP otherwise they will be downloaded using [parallel-fastq-dump](https://github.com/rvalieris/parallel-fastq-dump). -
+
Output files * `preprocess/sra/` - * `sra_run_info.tsv`: Run information file for all samples to be downloaded from the ENA/SRA. - * `*.fastq.gz`: Paired-end/single-end reads downloaded and extracted from the ENA/SRA. + * `sra_run_info.tsv`: Run information file for all samples to be downloaded from the ENA/SRA. + * `*.fastq.gz`: Paired-end/single-end reads downloaded and extracted from the ENA/SRA. * `preprocess/sra/md5/` - * `*.md5`: Files containing `md5` sum for FastQ files downloaded from ENA/SRA. + * `*.md5`: Files containing `md5` sum for FastQ files downloaded from ENA/SRA. * `preprocess/sra/log/` - * `*.fastq_dump.log`: Log file generated from stdout whilst running `parallel-fastq-dump`. + * `*.fastq_dump.log`: Log file generated from stdout whilst running `parallel-fastq-dump`. > **NB:** Downloaded FastQ files will only be saved in the results directory if the `--save_sra_fastq` parameter is supplied. @@ -69,13 +69,13 @@ If multiple libraries/runs have been provided for the same sample in the input s ![MultiQC - FastQC per base sequence plot](images/mqc_fastqc_plot.png) -
+
Output files * `preprocess/fastqc/` - * `*_fastqc.html`: FastQC report containing quality metrics. + * `*_fastqc.html`: FastQC report containing quality metrics. * `preprocess/fastqc/zips/` - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. > **NB:** The FastQC plots in this directory are generated relative to the raw, input reads. They may contain adapter sequence and regions of low quality. To see how your reads look after trimming please refer to the FastQC reports in the `preprocess/fastp/fastqc/` directory. @@ -87,20 +87,20 @@ If multiple libraries/runs have been provided for the same sample in the input s ![MultiQC - fastp filtered reads plot](images/mqc_fastp_plot.png) -
+
Output files * `preprocess/fastp/` - * `*.fastp.html`: Trimming report in html format. - * `*.fastp.json`: Trimming report in json format. - * `*.trim.fastq.gz`: Paired-end/single-end trimmed reads. - * `*.trim.fail.gz`: Unpaired trimmed reads (only for paired-end data). + * `*.fastp.html`: Trimming report in html format. + * `*.fastp.json`: Trimming report in json format. + * `*.trim.fastq.gz`: Paired-end/single-end trimmed reads. + * `*.trim.fail.gz`: Unpaired trimmed reads (only for paired-end data). * `preprocess/fastp/log/` - * `*.fastp.log`: Trimming log file. + * `*.fastp.log`: Trimming log file. * `preprocess/fastp/fastqc/`: - * `*.trim_fastqc.html`: FastQC report of the trimmed reads. + * `*.trim_fastqc.html`: FastQC report of the trimmed reads. * `preprocess/fastp/fastqc/zips/` - * `*.trim_fastqc.zip`: Zip archive containing the FastQC report. + * `*.trim_fastqc.zip`: Zip archive containing the FastQC report. > **NB:** Post-trimmed FastQ files will only be saved in the results directory if the `--save_trimmed` parameter is supplied. @@ -116,13 +116,13 @@ A file called `summary_variants_metrics_mqc.tsv` containing a selection of read ![MultiQC - Bowtie2 alignment score plot](images/mqc_bowtie2_plot.png) -
+
Output files * `variants/bam/` - * `.bam`: Original BAM file created by Bowtie 2. Only present if `--save_align_intermeds` parameter is supplied. + * `.bam`: Original BAM file created by Bowtie 2. Only present if `--save_align_intermeds` parameter is supplied. * `variants/bam/log/` - * `.bowtie2.log`: Bowtie 2 mapping log file. + * `.bowtie2.log`: Bowtie 2 mapping log file.
@@ -132,14 +132,14 @@ Bowtie 2 BAM files are further processed with [SAMtools](http://samtools.sourcef ![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) -
+
Output files * `variants/bam/` - * `.sorted.bam`: Coordinate sorted BAM file containing read alignment information. - * `.sorted.bam.bai`: Index file for coordinate sorted BAM file. + * `.sorted.bam`: Coordinate sorted BAM file containing read alignment information. + * `.sorted.bam.bai`: Index file for coordinate sorted BAM file. * `variants/bam/samtools_stats/` - * SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. + * SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. > **NB:** BAM files and their associated indices will only be saved in the results directory if the `--save_align_intermeds` parameter is supplied. @@ -151,16 +151,16 @@ If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pa ![MultiQC - iVar trim primer heatmap](images/mqc_ivar_trim_plot.png) -
+
Output files * `variants/bam/` - * `.trim.sorted.bam`: Coordinate sorted BAM file after primer trimming. - * `.trim.sorted.bam.bai`: Index file for coordinate sorted BAM file after primer trimming. + * `.trim.sorted.bam`: Coordinate sorted BAM file after primer trimming. + * `.trim.sorted.bam.bai`: Index file for coordinate sorted BAM file after primer trimming. * `variants/bam/samtools_stats/` - * SAMtools `.trim.flagstat`, `.trim.idxstats` and `.trim.stats` files generated from the primer trimmed alignment files. + * SAMtools `.trim.flagstat`, `.trim.idxstats` and `.trim.stats` files generated from the primer trimmed alignment files. * `variants/bam/log/` - * `.trim.ivar.log`: iVar trim log file obtained from stdout. + * `.trim.ivar.log`: iVar trim log file obtained from stdout. > **NB:** Post-trimmed BAM files and their associated indices will only be saved in the results directory if the `--save_align_intermeds` parameter is supplied. @@ -172,16 +172,16 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- ![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_duplicates_plot.png) -
+
Output files * `variants/bam/` - * `..sorted.bam`: Coordinate sorted BAM file after duplicate marking. - * `..sorted.bam.bai`: Index file for coordinate sorted BAM file after duplicate marking. + * `..sorted.bam`: Coordinate sorted BAM file after duplicate marking. + * `..sorted.bam.bai`: Index file for coordinate sorted BAM file after duplicate marking. * `variants/bam/samtools_stats/` - * SAMtools `..flagstat`, `..idxstats` and `..stats` files generated from the duplicate marked alignment files. + * SAMtools `..flagstat`, `..idxstats` and `..stats` files generated from the duplicate marked alignment files. * `variants/bam/picard_metrics/` - * `..MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates. + * `..MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates. > **NB:** The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. @@ -195,12 +195,12 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- ![MultiQC - Picard insert size plot](images/mqc_picard_insert_size_plot.png) -
+
Output files * `variants/bam/picard_metrics/` - * `..CollectMultipleMetrics.*`: Alignment QC files from picard CollectMultipleMetrics in `*_metrics` textual format and plotted in `*.pdf` format. - * `..CollectWgsMetrics.coverage_metrics`: Coverage metrics file from CollectWgsMetrics. + * `..CollectMultipleMetrics.*`: Alignment QC files from picard CollectMultipleMetrics in `*_metrics` textual format and plotted in `*.pdf` format. + * `..CollectWgsMetrics.coverage_metrics`: Coverage metrics file from CollectWgsMetrics. > **NB:** The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. @@ -210,36 +210,36 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- [mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. -
+
Output files * `variants/bam/mosdepth/genome/` - * `..genome.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. - * `..genome.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. - * `..genome.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. - * `..genome.per-base.bed.gz`: Per-base depth output genome-wide. - * `..genome.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. - * `..genome.regions.bed.gz`: Mean regional depth for 200bp windows genome-wide. - * `..genome.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..genome.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..genome.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..genome.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + * `..genome.per-base.bed.gz`: Per-base depth output genome-wide. + * `..genome.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..genome.regions.bed.gz`: Mean regional depth for 200bp windows genome-wide. + * `..genome.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. * `variants/bam/mosdepth/genome/plots/` - * `all_samples..genome.regions.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. - * `..genome.regions.coverage.pdf`: Whole-genome coverage plot. - * `..genome.regions.coverage.tsv`: File containing coverage values for the above plot. + * `all_samples..genome.regions.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + * `..genome.regions.coverage.pdf`: Whole-genome coverage plot. + * `..genome.regions.coverage.tsv`: File containing coverage values for the above plot. * `variants/bam/mosdepth/amplicon/` - * `..amplicon.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. - * `..amplicon.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. - * `..amplicon.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. - * `..amplicon.per-base.bed.gz`: Per-base depth output genome-wide. - * `..amplicon.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. - * `..amplicon.regions.bed.gz`: Mean regional depth for individual amplicons genome-wide. - * `..amplicon.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. - * `..amplicon.thresholds.bed.gz`: Threshold output to indicate how many bases in each region are covered at given thresholds. - * `..amplicon.thresholds.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..amplicon.mosdepth.global.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..amplicon.mosdepth.region.dist.txt`: A distribution of proportion of bases covered at or above a given threshold for each chromosome and genome-wide. + * `..amplicon.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + * `..amplicon.per-base.bed.gz`: Per-base depth output genome-wide. + * `..amplicon.per-base.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..amplicon.regions.bed.gz`: Mean regional depth for individual amplicons genome-wide. + * `..amplicon.regions.bed.gz.csi`: CSI index that can be used for tabix queries from above file. + * `..amplicon.thresholds.bed.gz`: Threshold output to indicate how many bases in each region are covered at given thresholds. + * `..amplicon.thresholds.bed.gz.csi`: CSI index that can be used for tabix queries from above file. * `variants/bam/mosdepth/amplicon/plots/` - * `all_samples..amplicon.regions.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. - * `all_samples..amplicon.regions.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. - * `..amplicon.regions.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. - * `..amplicon.regions.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + * `all_samples..amplicon.regions.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + * `all_samples..amplicon.regions.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + * `..amplicon.regions.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + * `..amplicon.regions.coverage.tsv`: File containing per-amplicon coverage values for the above plot. > NB: The value of `` in the output file names above will depend on the preceeding steps that were run in the pipeline. If `--protocol amplicon` is specified then this process will be run on the iVar trimmed alignments and the value of `` will be `trim.mkD`. However, if `--protocol metagenomic` is specified then the process will be run on the alignments obtained directly from Bowtie 2 and the value of `` will be `mkD`; where `mkD` is an abbreviation for MarkDuplicates. @@ -255,24 +255,24 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- ![MultiQC - VarScan 2 variants called plot](images/mqc_varscan2_plot.png) -
+
Output files * `variants/varscan2/` - * `.vcf.gz`: Low frequency variants VCF file. - * `.vcf.gz.tbi`: Low frequency variants VCF index file. - * `.AF.vcf.gz`: High frequency variants VCF file. - * `.AF.vcf.gz.tbi`: High frequency variants VCF index file. + * `.vcf.gz`: Low frequency variants VCF file. + * `.vcf.gz.tbi`: Low frequency variants VCF index file. + * `.AF.vcf.gz`: High frequency variants VCF file. + * `.AF.vcf.gz.tbi`: High frequency variants VCF index file. * `variants/varscan2/consensus/` - * `.AF.consensus.fa`: Consensus Fasta file generated by integrating the high frequency variants called by VarScan into the reference genome. - * `.AF.consensus.masked.fa`: Masked consensus Fasta file. + * `.AF.consensus.fa`: Consensus Fasta file generated by integrating the high frequency variants called by VarScan into the reference genome. + * `.AF.consensus.masked.fa`: Masked consensus Fasta file. * `variants/varscan2/log/` - * `.varscan2.log`: Log file generated from stderr by VarScan 2. + * `.varscan2.log`: Log file generated from stderr by VarScan 2. * `variants/varscan2/bcftools_stats/` - * `.bcftools_stats.txt`: Statistics and counts obtained from low frequency variants VCF file. - * `.AF.bcftools_stats.txt`: Statistics and counts obtained from high frequency variants VCF file. + * `.bcftools_stats.txt`: Statistics and counts obtained from low frequency variants VCF file. + * `.AF.bcftools_stats.txt`: Statistics and counts obtained from high frequency variants VCF file. * `variants/bam/mpileup/` - * `..mpileup`: mpileup files summarize all the data from aligned reads at a given genomic position. Each row of the mpileup file gives similar information to a single vertical column of reads as visualised in IGV. + * `..mpileup`: mpileup files summarize all the data from aligned reads at a given genomic position. Each row of the mpileup file gives similar information to a single vertical column of reads as visualised in IGV. > **NB:** The value of `` in the output file names above is determined by the `--max_allele_freq` parameter (Default: 0.8). > **NB:** Output mpileup files will only be saved in the directory if the `--save_mpileup` parameter is supplied. The naming convention for these files will depend on the preceeding steps that were run in the pipeline as described in the paragraph explaining the value of `` in the section above. @@ -285,24 +285,24 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- ![MultiQC - iVar variants called plot](images/mqc_ivar_variants_plot.png) -
+
Output files * `variants/ivar/` - * `.tsv`: Low frequency variants in TSV format. - * `.vcf.gz`: Low frequency variants VCF file. - * `.vcf.gz.tbi`: Low frequency variants VCF index file. - * `.AF.vcf.gz`: High frequency variants VCF file. - * `.AF.vcf.gz.tbi`: High frequency variants VCF index file. + * `.tsv`: Low frequency variants in TSV format. + * `.vcf.gz`: Low frequency variants VCF file. + * `.vcf.gz.tbi`: Low frequency variants VCF index file. + * `.AF.vcf.gz`: High frequency variants VCF file. + * `.AF.vcf.gz.tbi`: High frequency variants VCF index file. * `variants/ivar/consensus/` - * `.AF.consensus.fa`: Consensus Fasta file generated by iVar at the frequency threshold set by the `--max_allele_freq` parameter. - * `.AF.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. + * `.AF.consensus.fa`: Consensus Fasta file generated by iVar at the frequency threshold set by the `--max_allele_freq` parameter. + * `.AF.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. * `variants/ivar/log/` - * `.variant.counts.log`: Variant counts for low frequency variants. - * `.AF.variant.counts.log`: Variant counts for high frequency variants. + * `.variant.counts.log`: Variant counts for low frequency variants. + * `.AF.variant.counts.log`: Variant counts for high frequency variants. * `variants/ivar/bcftools_stats/` - * `.bcftools_stats.txt`: Statistics and counts obtained from low frequency variants VCF file. - * `.AF.bcftools_stats.txt`: Statistics and counts obtained from high frequency variants VCF file. + * `.bcftools_stats.txt`: Statistics and counts obtained from low frequency variants VCF file. + * `.AF.bcftools_stats.txt`: Statistics and counts obtained from high frequency variants VCF file.
@@ -312,17 +312,17 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- ![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) -
+
Output files * `variants/bcftools/` - * `.vcf.gz`: Variants VCF file. - * `.vcf.gz.tbi`: Variants VCF index file. + * `.vcf.gz`: Variants VCF file. + * `.vcf.gz.tbi`: Variants VCF index file. * `variants/bcftools/consensus/` - * `.consensus.fa`: Consensus Fasta file generated by integrating the variants called by BCFTools into the reference genome. - * `.consensus.masked.fa`: Masked consensus Fasta file. + * `.consensus.fa`: Consensus Fasta file generated by integrating the variants called by BCFTools into the reference genome. + * `.consensus.masked.fa`: Masked consensus Fasta file. * `variants/bcftools/bcftools_stats/` - * `.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + * `.bcftools_stats.txt`: Statistics and counts obtained from VCF file.
@@ -334,16 +334,16 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- ![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) -
+
Output files * `variants//snpeff/` - * `*.snpEff.csv`: Variant annotation csv file. - * `*.snpEff.genes.txt`: Gene table for annotated variants. - * `*.snpEff.summary.html`: Summary html file for variants. - * `*.snpEff.vcf.gz`: VCF file with variant annotations. - * `*.snpEff.vcf.gz.tbi`: Index for VCF file with variant annotations. - * `*.snpSift.table.txt`: SnpSift summary table. + * `*.snpEff.csv`: Variant annotation csv file. + * `*.snpEff.genes.txt`: Gene table for annotated variants. + * `*.snpEff.summary.html`: Summary html file for variants. + * `*.snpEff.vcf.gz`: VCF file with variant annotations. + * `*.snpEff.vcf.gz.tbi`: Index for VCF file with variant annotations. + * `*.snpSift.table.txt`: SnpSift summary table. > **NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'varscan2,ivar,bcftools'). If applicable, you will have two sets of files where the file name prefix will be `` for low-frequency variants and `.AF` for high frequency variants. @@ -353,11 +353,11 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- [QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. -
+
Output files * `variants//quast/AF/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. > **NB:** The value of `` in the output directory name above is determined by the `--callers` parameter (Default: 'varscan2,ivar,bcftools') and the value of `` is determined by the `--max_allele_freq` parameter (Default: 0.8). @@ -373,17 +373,17 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri ![MultiQC - Cutadapt filtered reads plot](images/mqc_cutadapt_plot.png) -
+
Output files * `assembly/cutadapt/` - * `*.ptrim.fastq.gz`: FastQ files after primer sequence trimming. + * `*.ptrim.fastq.gz`: FastQ files after primer sequence trimming. * `assembly/cutadapt/log/` - * `*.cutadapt.log`: Cutadapt log file generated from stdout. + * `*.cutadapt.log`: Cutadapt log file generated from stdout. * `assembly/cutadapt/fastqc/` - * `*.ptrim_fastqc.html`: FastQC report of the trimmed reads. + * `*.ptrim_fastqc.html`: FastQC report of the trimmed reads. * `assembly/cutadapt/fastqc/zips/` - * `*.ptrim_fastqc.zip`: Zip archive containing the FastQC report. + * `*.ptrim_fastqc.zip`: Zip archive containing the FastQC report. > **NB:** Trimmed FastQ files will only be saved in the results directory if the `--save_trimmed` parameter is supplied. @@ -397,13 +397,13 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the ![MultiQC - Kraken 2 classification plot](images/mqc_kraken2_plot.png) -
+
Output files * `assembly/kraken2/` - * `*.host*.fastq.gz`: Reads that were classified to the host database. - * `*.viral*.fastq.gz`: Reads that were unclassified to the host database. - * `*.kraken2.report.txt`: Kraken 2 taxonomic report. See [here](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual#sample-report-output-format) for a detailed description of the format. + * `*.host*.fastq.gz`: Reads that were classified to the host database. + * `*.viral*.fastq.gz`: Reads that were unclassified to the host database. + * `*.kraken2.report.txt`: Kraken 2 taxonomic report. See [here](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual#sample-report-output-format) for a detailed description of the format. > **NB:** Output FastQ files will only be saved in the results directory if the `--save_kraken2_fastq` parameter is supplied. @@ -415,15 +415,15 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the [Bandage](https://rrwick.github.io/Bandage/) is a program for visualising *de novo* assembly graphs. By displaying connections which are not present in the contigs file, Bandage opens up new possibilities for analysing *de novo* assemblies. -
+
Output files * `assembly/spades/` - * `*.scaffolds.fa`: SPAdes scaffold assembly. - * `*.assembly.gfa`: SPAdes assembly graph in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format. + * `*.scaffolds.fa`: SPAdes scaffold assembly. + * `*.assembly.gfa`: SPAdes assembly graph in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format. * `assembly/spades/bandage/` - * `*.png`: Bandage visualisation for SPAdes assembly graph in PNG format. - * `*.svg`: Bandage visualisation for SPAdes assembly graph in SVG format. + * `*.png`: Bandage visualisation for SPAdes assembly graph in PNG format. + * `*.svg`: Bandage visualisation for SPAdes assembly graph in SVG format.
@@ -431,15 +431,15 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the [metaSPAdes](http://cab.spbu.ru/software/meta-spades/) is a de Bruijn graph-based assembler that is distributed with SPAdes and executed via the `--meta` option. It can be used for the simultaneous reconstruction of multiple genomes as observed in metagenomics data. -
+
Output files * `assembly/metaspades/` - * `*.scaffolds.fa`: metaSPAdes scaffold assembly. - * `*.assembly.gfa`: metaSPAdes assembly graph in GFA format. + * `*.scaffolds.fa`: metaSPAdes scaffold assembly. + * `*.assembly.gfa`: metaSPAdes assembly graph in GFA format. * `assembly/metaspades/bandage/` - * `*.png`: Bandage visualisation for metaSPAdes assembly graph in PNG format. - * `*.svg`: Bandage visualisation for metaSPAdes assembly graph in SVG format. + * `*.png`: Bandage visualisation for metaSPAdes assembly graph in PNG format. + * `*.svg`: Bandage visualisation for metaSPAdes assembly graph in SVG format.
@@ -447,15 +447,15 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the [Unicycler](https://github.com/rrwick/Unicycler) is an assembly pipeline for bacterial genomes. It can assemble Illumina-only read sets where it functions as a SPAdes-optimiser. -
+
Output files * `assembly/unicycler/` - * `*.scaffolds.fa`: Unicycler scaffold assembly. - * `*.assembly.gfa`: Unicycler assembly graph in GFA format. + * `*.scaffolds.fa`: Unicycler scaffold assembly. + * `*.assembly.gfa`: Unicycler assembly graph in GFA format. * `assembly/unicycler/bandage/` - * `*.png`: Bandage visualisation for Unicycler assembly graph in PNG format. - * `*.svg`: Bandage visualisation for Unicycler assembly graph in SVG format. + * `*.png`: Bandage visualisation for Unicycler assembly graph in PNG format. + * `*.svg`: Bandage visualisation for Unicycler assembly graph in SVG format.
@@ -463,11 +463,11 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the [Minia](https://github.com/GATB/minia) is a short-read assembler based on a de Bruijn graph, capable of assembling a human genome on a desktop computer in a day. The output of Minia is a set of contigs. Minia produces results of similar contiguity and accuracy to other de Bruijn assemblers. -
+
Output files * `assembly/minia//` - * `*.scaffolds.fa`: Minia scaffold assembly. + * `*.scaffolds.fa`: Minia scaffold assembly. > **NB:** The value of `` in the output directory name above is determined by the `--minia_kmer` parameter (Default: 31). @@ -477,12 +477,12 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the [blastn](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch) is used to align the assembled contigs against the virus reference genome. -
+
Output files * `assembly//blast/` - * `*.blast.txt`: BLAST results against the target virus. - * `*.blast.filt.header.txt`: Filtered BLAST results. + * `*.blast.txt`: BLAST results against the target virus. + * `*.blast.filt.header.txt`: Filtered BLAST results. > **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades,metaspades,unicycler,minia'). @@ -492,18 +492,18 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the [ABACAS](https://www.sanger.ac.uk/science/tools/pagit) was developed to rapidly contiguate (align, order, orientate), visualize and design primers to close gaps on shotgun assembled contigs based on a reference sequence. -
+
Output files * `assembly//abacas/` - * `*.abacas.bin`: Bin file that contains contigs that are not used in ordering. - * `*.abacas.crunch`: Comparison file. - * `*.abacas.fasta`: Ordered and orientated sequence file. - * `*.abacas.gaps`: Gap information. - * `*.abacas.gaps.tab`: Gap information in tab-delimited format. - * `*.abacas.MULTIFASTA.fa`: A list of ordered and orientated contigs in a multi-fasta format. - * `*.abacas.tab`: Feature file - * `*.unused_contigs.out`: Information on contigs that have a mapping information but could not be used in the ordering. + * `*.abacas.bin`: Bin file that contains contigs that are not used in ordering. + * `*.abacas.crunch`: Comparison file. + * `*.abacas.fasta`: Ordered and orientated sequence file. + * `*.abacas.gaps`: Gap information. + * `*.abacas.gaps.tab`: Gap information in tab-delimited format. + * `*.abacas.MULTIFASTA.fa`: A list of ordered and orientated contigs in a multi-fasta format. + * `*.abacas.tab`: Feature file + * `*.unused_contigs.out`: Information on contigs that have a mapping information but could not be used in the ordering. * `assembly//abacas/nucmer/`: Folder containing the files generated by the NUCmer algorithm used by ABACAS. > **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades,metaspades,unicycler,minia'). @@ -514,15 +514,15 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the [PlasmidID](https://github.com/BU-ISCIII/plasmidID) was used to graphically represent the alignment of the reference genome relative to a given assembly. This helps to visualize the coverage of the reference genome in the assembly. To find more information about the output files refer to the [documentation](https://github.com/BU-ISCIII/plasmidID/wiki/Understanding-the-image:-track-by-track). -
+
Output files * `assembly//plasmidid//` - * `images/_.png`: PNG file with the visualization of the alignment between the viral assembly and the reference viral genome. - * `data/`: Files used for drawing the circos images. - * `database/`: Annotation files used for drawing the circos images. - * `fasta_files`: Folder with fasta files that correspond to the selection of contigs/scaffolds required to reconstruct the reference genome generated in the `images/` folder. - * `log/`: Log files. + * `images/_.png`: PNG file with the visualization of the alignment between the viral assembly and the reference viral genome. + * `data/`: Files used for drawing the circos images. + * `database/`: Annotation files used for drawing the circos images. + * `fasta_files`: Folder with fasta files that correspond to the selection of contigs/scaffolds required to reconstruct the reference genome generated in the `images/` folder. + * `log/`: Log files. > **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades,metaspades,unicycler,minia'). @@ -534,11 +534,11 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the ![MultiQC - QUAST contig counts](images/mqc_quast_plot.png) -
+
Output files * `assembly//quast/` - * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + * `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. > **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades,metaspades,unicycler,minia'). @@ -554,18 +554,18 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the [Bandage](https://github.com/rrwick/Bandage), a Bioinformatics Application for Navigating De novo Assembly Graphs Easily, is a GUI program that allows users to interact with the assembly graphs made by de novo assemblers and other graphs in GFA format. Bandage was used to render induced genome variation graphs as static PNG and SVG images. -
+
Output files * `assembly//variants/` - * `*.gfa`: Induced genome variation graph. - * `*.vcf.gz`: VCF file with variant annotations. - * `*.vcf.gz.tbi`: Index for VCF file with variant annotations. + * `*.gfa`: Induced genome variation graph. + * `*.vcf.gz`: VCF file with variant annotations. + * `*.vcf.gz.tbi`: Index for VCF file with variant annotations. * `assembly//variants/bcftools_stats/` - * `*.bcftools_stats.txt`: Statistics and counts for variants in VCF files. + * `*.bcftools_stats.txt`: Statistics and counts for variants in VCF files. * `assembly//bandage/` - * `*.png`: Bandage visualisation for induced genome variation graph in PNG format. - * `*.svg`: Bandage visualisation for induced genome variation graph in SVG format. + * `*.png`: Bandage visualisation for induced genome variation graph in PNG format. + * `*.svg`: Bandage visualisation for induced genome variation graph in SVG format. > **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades,metaspades,unicycler,minia'). @@ -577,16 +577,16 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the [SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. -
+
Output files * `assembly//variants/snpeff/` - * `*.snpEff.csv`: Variant annotation csv file. - * `*.snpEff.genes.txt`: Gene table for annotated variants. - * `*.snpEff.summary.html`: Summary html file for variants. - * `*.snpEff.vcf.gz`: VCF file with variant annotations. - * `*.snpEff.vcf.gz.tbi`: Index for VCF file with variant annotations. - * `*.snpSift.table.txt`: SnpSift summary table. + * `*.snpEff.csv`: Variant annotation csv file. + * `*.snpEff.genes.txt`: Gene table for annotated variants. + * `*.snpEff.summary.html`: Summary html file for variants. + * `*.snpEff.vcf.gz`: VCF file with variant annotations. + * `*.snpEff.vcf.gz.tbi`: Index for VCF file with variant annotations. + * `*.snpSift.table.txt`: SnpSift summary table. > **NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades,metaspades,unicycler,minia'). @@ -606,13 +606,13 @@ The pipeline has special steps which also allow the software versions to be repo Please click [here](https://raw.githack.com/nf-core/viralrecon/master/docs/html/multiqc_report.html) to see an example MultiQC report generated using the parameters defined in [this configuration file](https://github.com/nf-core/viralrecon/blob/master/conf/test_full.config) to run the pipeline on [samples](https://zenodo.org/record/3735111) which were prepared from the [ncov-2019 ARTIC Network V1 amplicon set](https://artic.network/ncov-2019) and sequenced on the Illumina MiSeq platform in 301bp paired-end format. -
+
Output files * `multiqc/` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `multiqc_plots/`: directory containing static images from the report in various formats. + * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + * `multiqc_plots/`: directory containing static images from the report in various formats.
@@ -620,17 +620,17 @@ Please click [here](https://raw.githack.com/nf-core/viralrecon/master/docs/html/ A number of genome-specific files are generated by the pipeline because they are required for the downstream processing of the results. If the `--save_reference` parameter is provided then the Bowtie 2 alignment indices, BLAST and Kraken 2 databases downloaded/generated by the pipeline will be saved in the `genome/` directory. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build a Kraken 2 database for the host genome. This can be quite a time-consuming process and it permits their reuse for future runs of the pipeline or for other purposes. -
+
Output files * `genome/` - * `BlastDB/`: BLAST database for viral genome. - * `Bowtie2Index/`: Bowtie 2 index for viral genome. - * `kraken2_/`: Kraken 2 database for host genome. - * `SnpEffDB/`: SnpEff database for viral genome. - * `snpeff.config`: SnpEff config file for viral genome. - * Unzipped genome fasta file for viral genome - * Unzipped genome annotation GFF file for viral genome + * `BlastDB/`: BLAST database for viral genome. + * `Bowtie2Index/`: Bowtie 2 index for viral genome. + * `kraken2_/`: Kraken 2 database for host genome. + * `SnpEffDB/`: SnpEff database for viral genome. + * `snpeff.config`: SnpEff config file for viral genome. + * Unzipped genome fasta file for viral genome + * Unzipped genome annotation GFF file for viral genome
@@ -638,13 +638,13 @@ A number of genome-specific files are generated by the pipeline because they are [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage. -
+
Output files * `pipeline_info/` - * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.csv`. - * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. - * Documentation for interpretation of results in HTML format: `results_description.html`. + * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.csv`. + * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. + * Documentation for interpretation of results in HTML format: `results_description.html`.
From 60007d8dded44678b6cf1d04d686a8d2f1da3484 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 14:43:27 +0100 Subject: [PATCH 062/129] Update markdownlint --- .github/markdownlint.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml index e84d9242..7c7628c8 100644 --- a/.github/markdownlint.yml +++ b/.github/markdownlint.yml @@ -5,3 +5,5 @@ no-duplicate-header: siblings_only: true MD033: allowed_elements: [details, summary] +MD007: + indent: 4 From 651f9bcd63d65e1a10cd4fc624ad17e03385df5c Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 14:47:36 +0100 Subject: [PATCH 063/129] Make everything 4 space tabs..argggghhhh --- CHANGELOG.md | 8 +-- docs/usage.md | 162 +++++++++++++++++++++++++------------------------- 2 files changed, 85 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc73f843..5a782858 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,14 +10,14 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#112](https://github.com/nf-core/viralrecon/issues/112) - Per-amplicon coverage plot * [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub * Parameters: - * `--skip_mosdepth` to skip genome-wide and amplicon coverage plot generation from mosdepth output - * `--amplicon_left_suffix` to provide left primer suffix used in name field of `--amplicon_bed` - * `--amplicon_right_suffix` to provide right primer suffix used in name field of `--amplicon_bed` + * `--skip_mosdepth` to skip genome-wide and amplicon coverage plot generation from mosdepth output + * `--amplicon_left_suffix` to provide left primer suffix used in name field of `--amplicon_bed` + * `--amplicon_right_suffix` to provide right primer suffix used in name field of `--amplicon_bed` ### `Removed` * Parameters: - * `--skip_qc` + * `--skip_qc` ### `Dependencies` diff --git a/docs/usage.md b/docs/usage.md index 56c8b10c..7fc6c8ed 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -5,89 +5,89 @@ * [Table of contents](#table-of-contents) * [Introduction](#introduction) * [Running the pipeline](#running-the-pipeline) - * [Updating the pipeline](#updating-the-pipeline) - * [Reproducibility](#reproducibility) + * [Updating the pipeline](#updating-the-pipeline) + * [Reproducibility](#reproducibility) * [Main arguments](#main-arguments) - * [`-profile`](#-profile) - * [`--input`](#--input) - * [`--protocol`](#--protocol) - * [`--amplicon_bed`](#--amplicon_bed) - * [`--amplicon_fasta`](#--amplicon_fasta) + * [`-profile`](#-profile) + * [`--input`](#--input) + * [`--protocol`](#--protocol) + * [`--amplicon_bed`](#--amplicon_bed) + * [`--amplicon_fasta`](#--amplicon_fasta) * [SRA download](#sra-download) - * [`--save_sra_fastq`](#--save_sra_fastq) - * [`--skip_sra`](#--skip_sra) + * [`--save_sra_fastq`](#--save_sra_fastq) + * [`--skip_sra`](#--skip_sra) * [Reference genomes](#reference-genomes) - * [`--genome`](#--genome) - * [`--fasta`](#--fasta) - * [`--gff`](#--gff) - * [`--save_reference`](#--save_reference) + * [`--genome`](#--genome) + * [`--fasta`](#--fasta) + * [`--gff`](#--gff) + * [`--save_reference`](#--save_reference) * [Read trimming](#read-trimming) - * [`--cut_mean_quality`](#--cut_mean_quality) - * [`--qualified_quality_phred`](#--qualified_quality_phred) - * [`--unqualified_percent_limit`](#--unqualified_percent_limit) - * [`--min_trim_length`](#--min_trim_length) - * [`--skip_adapter_trimming`](#--skip_adapter_trimming) - * [`--skip_amplicon_trimming`](#--skip_amplicon_trimming) - * [`--save_trimmed`](#--save_trimmed) + * [`--cut_mean_quality`](#--cut_mean_quality) + * [`--qualified_quality_phred`](#--qualified_quality_phred) + * [`--unqualified_percent_limit`](#--unqualified_percent_limit) + * [`--min_trim_length`](#--min_trim_length) + * [`--skip_adapter_trimming`](#--skip_adapter_trimming) + * [`--skip_amplicon_trimming`](#--skip_amplicon_trimming) + * [`--save_trimmed`](#--save_trimmed) * [Kraken 2](#kraken-2) - * [`--kraken2_db`](#--kraken2_db) - * [`--kraken2_db_name`](#--kraken2_db_name) - * [`--kraken2_use_ftp`](#--kraken2_use_ftp) - * [`--save_kraken2_fastq`](#--save_kraken2_fastq) - * [`--skip_kraken2`](#--skip_kraken2) + * [`--kraken2_db`](#--kraken2_db) + * [`--kraken2_db_name`](#--kraken2_db_name) + * [`--kraken2_use_ftp`](#--kraken2_use_ftp) + * [`--save_kraken2_fastq`](#--save_kraken2_fastq) + * [`--skip_kraken2`](#--skip_kraken2) * [Variant calling](#variant-calling) - * [`--callers`](#-callers) - * [`--ivar_exclude_reads`](#--ivar_exclude_reads) - * [`--filter_dups`](#--filter_dups) - * [`--filter_unmapped`](#--filter_unmapped) - * [`--min_base_qual`](#--min_base_qual) - * [`--max_allele_freq`](#--max_allele_freq) - * [`--amplicon_left_suffix`](#--amplicon_left_suffix) - * [`--amplicon_right_suffix`](#--amplicon_right_suffix) - * [`--min_coverage`](#--min_coverage) - * [`--save_align_intermeds`](#--save_align_intermeds) - * [`--save_mpileup`](#--save_mpileup) - * [`--skip_markduplicates`](#--skip_markduplicates) - * [`--skip_picard_metrics`](#--skip_picard_metrics) - * [`--skip_mosdepth`](#--skip_mosdepth) - * [`--skip_snpeff`](#--skip_snpeff) - * [`--skip_variants_quast`](#--skip_variants_quast) - * [`--skip_variants`](#--skip_variants) + * [`--callers`](#-callers) + * [`--ivar_exclude_reads`](#--ivar_exclude_reads) + * [`--filter_dups`](#--filter_dups) + * [`--filter_unmapped`](#--filter_unmapped) + * [`--min_base_qual`](#--min_base_qual) + * [`--max_allele_freq`](#--max_allele_freq) + * [`--amplicon_left_suffix`](#--amplicon_left_suffix) + * [`--amplicon_right_suffix`](#--amplicon_right_suffix) + * [`--min_coverage`](#--min_coverage) + * [`--save_align_intermeds`](#--save_align_intermeds) + * [`--save_mpileup`](#--save_mpileup) + * [`--skip_markduplicates`](#--skip_markduplicates) + * [`--skip_picard_metrics`](#--skip_picard_metrics) + * [`--skip_mosdepth`](#--skip_mosdepth) + * [`--skip_snpeff`](#--skip_snpeff) + * [`--skip_variants_quast`](#--skip_variants_quast) + * [`--skip_variants`](#--skip_variants) * [De novo assembly](#de-novo-assembly) - * [`--assemblers`](#--assemblers) - * [`--minia_kmer`](#--minia_kmer) - * [`--skip_blast`](#--skip_blast) - * [`--skip_abacas`](#--skip_abacas) - * [`--skip_plasmidid`](#--skip_plasmidid) - * [`--skip_vg`](#--skip_vg) - * [`--skip_assembly_quast`](#--skip_assembly_quast) - * [`--skip_assembly`](#--skip_assembly) + * [`--assemblers`](#--assemblers) + * [`--minia_kmer`](#--minia_kmer) + * [`--skip_blast`](#--skip_blast) + * [`--skip_abacas`](#--skip_abacas) + * [`--skip_plasmidid`](#--skip_plasmidid) + * [`--skip_vg`](#--skip_vg) + * [`--skip_assembly_quast`](#--skip_assembly_quast) + * [`--skip_assembly`](#--skip_assembly) * [Skipping QC steps](#skipping-qc-steps) - * `--skip_fastqc` - * `--skip_multiqc` + * `--skip_fastqc` + * `--skip_multiqc` * [Job resources](#job-resources) - * [Automatic resubmission](#automatic-resubmission) - * [Custom resource requests](#custom-resource-requests) + * [Automatic resubmission](#automatic-resubmission) + * [Custom resource requests](#custom-resource-requests) * [AWS Batch specific parameters](#aws-batch-specific-parameters) - * [`--awsqueue`](#--awsqueue) - * [`--awsregion`](#--awsregion) - * [`--awscli`](#--awscli) + * [`--awsqueue`](#--awsqueue) + * [`--awsregion`](#--awsregion) + * [`--awscli`](#--awscli) * [Other command line parameters](#other-command-line-parameters) - * [`--outdir`](#--outdir) - * [`--email`](#--email) - * [`--email_on_fail`](#--email_on_fail) - * [`--max_multiqc_email_size`](#--max_multiqc_email_size) - * [`-name`](#-name) - * [`-resume`](#-resume) - * [`-c`](#-c) - * [`--custom_config_version`](#--custom_config_version) - * [`--custom_config_base`](#--custom_config_base) - * [`--max_memory`](#--max_memory) - * [`--max_time`](#--max_time) - * [`--max_cpus`](#--max_cpus) - * [`--plaintext_email`](#--plaintext_email) - * [`--monochrome_logs`](#--monochrome_logs) - * [`--multiqc_config`](#--multiqc_config) + * [`--outdir`](#--outdir) + * [`--email`](#--email) + * [`--email_on_fail`](#--email_on_fail) + * [`--max_multiqc_email_size`](#--max_multiqc_email_size) + * [`-name`](#-name) + * [`-resume`](#-resume) + * [`-c`](#-c) + * [`--custom_config_version`](#--custom_config_version) + * [`--custom_config_base`](#--custom_config_base) + * [`--max_memory`](#--max_memory) + * [`--max_time`](#--max_time) + * [`--max_cpus`](#--max_cpus) + * [`--plaintext_email`](#--plaintext_email) + * [`--monochrome_logs`](#--monochrome_logs) + * [`--multiqc_config`](#--multiqc_config) ## Introduction @@ -152,18 +152,18 @@ They are loaded in sequence, so later profiles can overwrite earlier profiles. If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. * `docker` - * A generic configuration profile to be used with [Docker](http://docker.com/) - * Pulls software from dockerhub: [`nfcore/viralrecon`](http://hub.docker.com/r/nfcore/viralrecon/) + * A generic configuration profile to be used with [Docker](http://docker.com/) + * Pulls software from dockerhub: [`nfcore/viralrecon`](http://hub.docker.com/r/nfcore/viralrecon/) * `singularity` - * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) - * Pulls software from DockerHub: [`nfcore/viralrecon`](http://hub.docker.com/r/nfcore/viralrecon/) + * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) + * Pulls software from DockerHub: [`nfcore/viralrecon`](http://hub.docker.com/r/nfcore/viralrecon/) * `conda` - * Please only use Conda as a last resort i.e. when it is not possible to run the pipeline with Docker or Singularity. - * A generic configuration profile to be used with [Conda](https://conda.io/docs/) - * Pulls most software from [Bioconda](https://bioconda.github.io/) + * Please only use Conda as a last resort i.e. when it is not possible to run the pipeline with Docker or Singularity. + * A generic configuration profile to be used with [Conda](https://conda.io/docs/) + * Pulls most software from [Bioconda](https://bioconda.github.io/) * `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters + * A profile with a complete configuration for automated testing + * Includes links to test data so needs no other parameters ### `--input` From 8c7ff81c54e00944ac3a1e49feab0a63035e26d6 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 15:30:05 +0100 Subject: [PATCH 064/129] Add mosdepth image to docs --- docs/images/mqc_mosdepth_plot.png | Bin 0 -> 28107 bytes docs/output.md | 2 ++ 2 files changed, 2 insertions(+) create mode 100755 docs/images/mqc_mosdepth_plot.png diff --git a/docs/images/mqc_mosdepth_plot.png b/docs/images/mqc_mosdepth_plot.png new file mode 100755 index 0000000000000000000000000000000000000000..614870b20ce97c68194d16b00cf35ec69519dc85 GIT binary patch literal 28107 zcmbq*c|4T;`u~lzkn~hiC~Xol3L$$WF)B%fLD|<-_FYd&ktN$$!eq&oWN)l3mN51u zODHn-WZ#DQUH9mm^PKa~?~m^v=RB{N<-R}n^|`M1_VG;psxk``2NQxIEGm~TY9I(> z3xd$QY^R5xFg}u5fdA0hYbYxqZyUMB;2&GeFI>5RAUVOzlsk;@?;ZCp>)0d69tZS) zbp2-BE(l_KTjk;fO&5cyb*kM-|FP>T8NFe3cVTw;k55k+5&da=%;;SVLL*EU)Ns-}OZ;Jq9Mf9myddk9k+ zoP&q>CQT+(^u{CPBv(@H%G`gn)eh9Osgk@`?U+(a*X#Ew1oL0(aS*{LQL^X=ws64& z|Ingrp{2gQr%A(+-s^XzmB<VGycV5f7`EVtlH6v=5dk$Orx}<5Vrqz-?t+DAN~H zpXAMUEreM5e%fngv3>=+K7y|}n=t&&X{|*%p7QCxTS>;^w?k(4FQ^-m;@U^fp3aBgh&)?AUA{7*Fgn{>!7UKX;Qyjhz|&Bcl5a)M zrQDlTUDC^Di?P@j8?{ljEn(wzkNbxKZ=$2_^@ydM`T>u{yu2ST@zmu8=WP`)sdxNH z!!yptkKhAqs+KwqWyh^|n-6G~7VWZA4ltaWot9nuE@ZHF{Rg?;dw#?-VdL%3b{Ab@ zLX}<4r048(MJa{-)xcD+!CFAQp_a!~1uOf=)U`|pJHZ@jj@3br-`DQnHuQ>0pbT#_ zr~VA~ADF_*NzQbZJTey18@Ncizk1g$_`brtdSp_A_kv|@hs#o3pZ|1n^4Ce$Il~Rd z<@+`p&E8$hzr+)EIaD|b<5nms68c^;;tkZc2?(^ zQhr%b#+^oAmDINhZ>ZJtB;-u)ShO5SVE2-YUCNqG7F_>(WZJ;B$i?fyeO2FZUe`T$ zGcIo(YIsoFW>=H^U_*Q*v#h(2I8;znoc&zp&GJlye%eY2erT@{Rd4a@%0SJ%U5MsU z^g)P;;8)~4^k0t6Y8dK_&IsGv?xQF=@FdK#Bp+%w?mBu4FAiYSs>DPBH#&v+x&aN}nKyp8=?c!fLV z?mRCq)4KY}fCPu3ALi5KieG!V)glX(yFIX!v4(_F&y6-ag`}P;JK3>1?=qo^_4-y{ z8MbM&q7&0$ymm`vuB)^70XFWRhO&YlLfP|yhY~rll#E4xGu4LRg}5)?#p2t3c@@3z zu37tBP5GLz52IPr-6U4=EK0~Gq3y5zzU#}g;xk#Z)#BcBn&K7t}huUIFxyw_iAl0j!r0*TJ9F7=Hd>V#;pvdxQrFlR|p&q`efeire)(H?m8?q z;8BwKUNq}8Pj}PT8-4N7_1uTOG$dWaA^H8tuD1{`PcQd-Q4VX=&<^ zZx4_CiiyMTsUqrX{kwA=)y`Gizp&X6Zy)v;k~Q{~KjCM8rAwauOzB$79v1o%%RadG zJe6~?)^KAKZD3+I7IvIDzqk==H(b#^SUa%pnX}`%=@Di`5BjWnl@`2H{HTNL(pNpZ z{)#oF4cp(rLf&zdfepQA>_(Q?4=O^J6X3`W~|%?jN=lKp;lEQOo8AU@+{08c6@uOf=iz4Ety>Fh5H zmoS|ARG-atj|56xJ?{Xwz>4>>c=d+oo%i$nQ*9n6y~GV8^>gCZ&&<^(3^xrp=_uTY z+RJdZyJtB!JH%7-()k~`FDWI<*Wv=bSC3GQPw92;^By#p>vcM3@Jpg3AxK2TTl=z@ zLHRqj-8LTQ)G0logw#{iC)5G<1U(+Qj~nIjyh7msdj9?WZx~wI@)t6R7rjPluo_zL%z0Tv`iS! z?X5oZR?cx6e*(r+tW*?!`40DcJT#qay|c>wD|}r#Y5v( ze!Vo_*6=s0;r?r0=g$){(O9)%{~wmbWnbGXq5cV@H4!DUzBSS-wVz!$5&kdenEU-& zpgfZTtzeHLez1X?dgcDcWhL?+y)aF8*1{=u(k_@&KUc1u{H*Ky;7;*HmCmpE9*U#N(CMvEM=SFw?7>mXdofg)X(u z_-UMHKWVUHtDU4;r+#Ia$3J1XW;ipD$w|V#DLb_|&P!Aey`7bOQm=r0I!D5x6ZYMU zz8yUF&nmul>`AOsl#-##@VJ4ftNxjRv35Cwvx#ROCN5WdiAbh@!3{QKWT_NZ$n-|7 zRCOCAwExY$u46jX5qINxou9wC*4NSKdb0M-g~XdV^}Icc1#IRgY_FJfb;>x4JW9LU z+mRM3yd^-yxMEalEj8Bj)}x>GT`M}*R~V<4phu5vH@@+FPiS`X_)9$ zT!(X7u@dk$flGrrn!NDm%4+rANVI&1QMjw&+{R?sdjpGBiy@0cKKKA{ottF zO9!OzL1D@4vV_8n4&cZz}Su+d6hUF7Mgk zE-hd25uV^;>WP$D(mK@X6xW?CGm?1R!?=oULQk7;_C_@JrA}n!J;z$y8$o))#-FRa zHM?Ixtj2RCf+E!%D&E8vU4OxCRBQDPS@q6Ql2&H>a*%_2=pL!(k)>?`oVxfaJ!|rNuhmV3I66^hlB#wIB(YDGJXd4mis(fYK|Zx_tXIwryVODUx@sdDG9lXj{K^^`*sb*jf5 zzUHO=jk^43Alx9%(*)C-5b0}|DlsvUYFOR-?6WCGZsU7>)`7GmxVK+b_ls89WIAkZ zV;u;Lms)v{^YE(9&%aWWZS87t!_3RSrkPnwsAY+!7=$Yv1&>|vM4PwA_!2JV2-??H z)FiBw6wTR98%iyWiT|LccvJN6KfI)L1=&wD;a$H_H!BIai$C zS^jN7#U(9p#ZFdcsdHP!!OYpK(|*&k>-F5jO){%WN?QAtboO*t7FA}) zQS!vSXX{-$XTK(t#(5994e}Dzn{}R5Vao z`nIQ@Dtb@YTcJh||2c_GcEmZXg7ST;LPhpRV|QisPx&9~EA@7M_pF9Wgs7{N-3be9 zZ(9UBjcY}CsBL5X6qmJ)vmPJoMW_7O=o1htCV_czTVL9;BKg%rI6$dyfW|I z?=POcsWh0szSi@#ZI-oy`sVjb*Wsya3hsRkBh^Pm@N(`GS$ks#CRsTL){PA}o-NHL z7;d~4ed;Q$wS2UEb}_ZeQgx-?V`V;;o6;u!V?rocnk*-P8^;f9l-NZwvmQ{+962sW z3NABTwy`r<7M^Sgr?Raj|8}2G%8PC_4+YS!7)nIs*=`EsGcrstjP<_nhO)jYFt=)N zjQ%wA_&sJDRTqgRM6TDX5{j_zOja>Fy5!yE3ITsud+%wbDo#456r7CtN z@l)zX-tz6~4N0%{ZP4AT&RBLi#*BS=dUsCDx4+|Ll zo)(_ozjS7`r9@nOTH`^3;l_~oz#)n-b@_);wPy`>YGy%`-CLAR(m={}weO+fRjm^D zf@y2-YxRL+O|qjs(!4t!?TR4oD_nnl;vTj)ptrD)&GG*`)%#+t#?KsFLep z^ZZhu;dBTa9dZSw-nNS5R&@pDF74G_Z9?pb#a4K^1*b4RDObAZ>%zy>ij}+P=TEKN zFV>f&Bj;-}==dy58n>YF?UnSfFBTsA60kzIy1EsMpcMqmyN4IcwnKk@45(M~M55 zh1ZcxDH?kU_I<*K+N8{z{fKAhr#}e4_tCYEAo0YG2_E7L)T%R%?B*IEr9ScUK6nX?vr(Nv9mnQWR`Qn0@k$MjLPdop71pzp%Ps*^$~(O4BOR8djq6~;eu zo&8yR8vfB-9f`*qtG&yF*K6qM<>$yzi>PaB-m~5^d1V``jf1bOSC_13THr%{je4gIeXc1>btt)TwJ(? z4YRdO1f)7XWK9pzKIr%{dzEWqQsQUN9^^N{Y_pvf7D?zW3&oKaKQFj&!>4>d^#vDn z$zu8ee2Ghm)2f}Un~;dNm&wPI%JAEJk4-Sj#x5==)Erkga-q|n7WC^%nVH!5c8QV* zS57E^uT;BbkYDyXmAg`8m-YRr?v*BX{?@?iHyg3RZyv$|5_oEH97@7-!-PP`*mhW5 zg-Kz_1&Lkoc6sI(MA4AsoL>RXj+RW^o;mxB@Kpuo_qW5h>-v&X9zGB3+i^p`-a*}|ZZ1LSahECeoJc~z z(l#g+TN4b6t*L22c6LSP#N?u?lZ1cjtdSW$-k_|A(!!2iOW)m$E8M>WUK&@6i|K$u z2_gQJd#adh>M)*@j^4xMRdQ>k0=f0;S8ltoNE5-(&7WR`pB9%i`F5q`UyXA+9vc^6 zt}b4yiI4dT&tq@|-bjdsPrQ{I?V!Z66Zaf`>t|h3(xi)X-!I1kzrKPcw|;x#r@VDr z_KA*e*W#q1j-i@sm7m+&CR^KD(rXH`LWs9=VOKTNn_vWaqE|wxM|QPnLJ>dd8ir~? zcM{=zEl#%~$Cy`A(m=+Z99LVL?UXS#wr$3SLoL@Nok~vfVb%Q9g-Is8Z|9VU79cQWeZ*{0=(Rz?F-{Lh#sba>BQ_dts$ zD29~6XqsogcW;1-4+st)%sf30wzA!I0mqVCc zPqiqD)lRS+A1x=BOUMCfSN?biF+b_oFZ)A>vg0Kes=t4)Sy`pb8P+g}=0&gIqQ)Pr z{Az7TFJS0E*D(n`PW`wr-YNXFkgCTA=L`uYndyL?^mC(B?;IX$W;BD-%GC5+ z`~}X-S_e9kXM^{1gNjWMti`1nL6LvKiFOX}mR&XCtHfv>fXkm!BP%%G>A^y0bm3gI zy+5V%1!waZ3L-x*sSal*A`_=n7Hz|o_-cM(!P%kI4SJM_c|Y2Ts;_q?Km{FjI{1EJ z9qzYxV!X_h#Sn_TGt!JK96R=GNBv}Af5`iN^Co5iewL?fGnqWGd?zqRt@u1V?nrZ`e%!gsU~F6x06=;9h6K}^ zNXh*Cc-e4p`K=xU6|STo@VJ{lX~d80F)Q3xNr3wM{pnbeNB8Hax-Gw9-dk+5G{aOy zW0cCo2OlKPU}0!pgzai%By56FRQx)$yYyZ1k9xEWc0=7*tBBjbIv+>tNI~3kXJyup z1bt%ey7n2L{8JcyFOuO*&OM)JD&D#OT=A<{6G!c9@zt32x^-fpzl~|Cq+IU1_lu|d z;fWAbzTmp>@Ddd{{XqS+h&6jom}k!En%AQZ$>s6^ccBAk^ily2$ zx$V&b)tKb5o}a2u_07okXul#HOpYd!%-sClmGB%DyTt=vA9Qev|268e(4HJ;@!({^!C*| z;|I~LyAXbttdZsS)-ym?KDXyo|C1*UwhwqJF-s$2vD_m|#S-#*!>QoZE4I9h^1Ecju&#k4NbD4n_r;E7X`lVxF z%xdRH+I*b!v*_yZA!1GY*=FAK^zJtwS7)bK^oA%`Tn=SB$t@{p+IVhNGn>iCAlx zwF=}1Gi3nxQnLncvk9{zhi<{Rp0%$vb8{CK!&u$CDOEbD zfeu2MAETR%&dbV7mVFv#wK}GOl+r9wbf5qVHB=0HrXDKUeEn>I`es^}b2}Wg?qC0l~F_v8h&lH=xE~Os! z>#AY`XO56TTdr1jVzV)K6szujs5wz5*O_7uXFl0e&}sH)G~sTWx3#gtM4i-t%LjlsjVRm7EO|JL&oFm( zxYuuXc(m9lWZ@-F{vU{+2gAJ4i2H)RS17=HIJ7QeH! zNf7;#l96~?A~AjQ5w3KscffRQ^*4tH(r~hY#3O3oB8vYU3(pxi>p6=A%b0GzqWy}K z8rK<$%7f9DUEzTzQTJ9!iClgU;k{OF@K?bJQ+Qnu$>Z%YTNNt+$kKpCEDj5^$B^pk z#-Sr9@mL4FIg^;)fRTxdZi>)?&OF_%0;4D9MoctwJNndM%yP6CnThdM;L3{7vhq23 z)c%#y<8#UYqRvVz-`6Oz8+hjT%92M)g*v`54%2nY)Xx&WM@^Vr_hN2oT4Hr|B}TLe zfZ`qAF@4p~=mN?A>vxSWU3uf_t_XCixTVrI2S*6WH!_0cQ46qecRm+7SO|A&aTYqY zB>cmN0r$7-wiMJx>@FdZ;P>!g5REU;>u^1$BBR}E!DWf*__uHKVEiAJ^D=@aWBt{} z@}jkFlME}1A_SGSz7D3=pRn7FX18G4wPAXmu+RDObbq7`pZ>oe#F#3dJs(q>uA7MX zr}@ane$Kp^?d>||BV)ip=BR;1uUMX6mD!b3Kjk-pcKORN|F8zQE1Icef-eF{{YN#dx%_|{C6Dh8w!=Sbzba0KhMyP8e*`0#DF>?* zcvU6{t-FO63gkTuc>WpHqX+qzjSMbEDeslVT5f1;cUBe8#CV+V5WAk(aYaqd$vI&H z2D(^CBR(CI{|JVcb(jFKh;Yr^?-qbS(CSQk1n#T;jffd+LF{Di`GnU${|ewd5dou| zkm{5W`WhI=jnw|9MX%zZ06uB53p|JVcndY4k=y;*)Si7jD(?$G9vb{V$XT-2j}V88 z#qG$c2hN02n1W9JXzWoHKAp3gFMsd(o6R~PZ5&v}Q767Ez&AQNI)Uc$6HqR4)D?xN z`U?87xMwv`^Bk5DG>Y86OHiMQ`)-4^>&L^z*W#shUhz=;sK2Rix8oGnQ>mgHO+Jx} z6`pgRmgLav8E*rB^vz|JQMa|TLWz%~1!L!Q{3o7=Zhsx=t|aF=YZ4#%dtjrtf|5<0 zsA0!YyhEWfuX)W z=6comt1LymH5EnTChmp8Ki2=G@a)D^*qM&1t*y|a6|=&LADC4*CcHLQvUzzcRQV4- ziChGRLTgzw_m1M*ffl$xH@_zjook3aqK!}( zY{~EUf+X$vPOrWoHlHdkZq(X>d4*2%t@&>O=df8~(P52JY8b^I%2UFL`(k^3eP?v~ z>+c?29i6zSC^aja$*W&*z!<7U2q_Ju5mkY)xd#jV<&ux#A*BRK+oxMNF$XFc>-T*7 z=XQT5$7hOB(k*KC0H!ouL2di^F?jAAptZw~#NNfbdu`15E(y;;ZTG#@6%d?qq(g2y z!yPq;1MZNi#Hd(KE9en7F_6K@3b;5-5WjtN!~8A1i}nXNiVa>60d+pBA;@^RSmzwG z-Ej_>E4nW4t^gjedChzBwCR-od~=HHZAXkJcE@w)^a=h#@~W22*+ zeA6*7pl_lGBLM`P9v$cx_g`bLPY1q#X+hC2AK`hy++cG1yHV75Xgjbcmz_N%+C1t` zM^OSHE+v*IKUZ%y0*wG~hUqD3F5Le$eyZI-oDXfU7J9<$5~ayyG>AwqX1Pp{$j3^J zla%DxW4i0=tSqKcdg|wMMddRdHZ0n*dQ^&|>&@iw4`7ts0*C{|3h4AJR8%l-UJQLA z1lM~gZPr;-YAfPnhBqT?6xv-^7*x2iIlq$OLTccJa20V>kE4~CZGw5ma3M~#;*)e2 z=^j+Z4{lq@F=0J!q4DUEHk257g4Tg@f%yer=q-^ent}S=meIWZq+wyqR)mnABHaYa zg;-HQXOr)KVNhL!@jrgNkl4cY4JLgE5m#Qa52hDkM@lqtUt(lbcKA6bs9s40Y#;2K z8L@k(0H!NW3d6QRA4|i@f5NWjze6L7Rm!Lv*M`pC9)MvU#&${SCfPb*8LryG(j3#= z12}_U81DfjcIM1GJAj$|3XxN&RAA14$PcM_#@7e-=R$*Lmjqu?0ekT9;e%YIq1iCf zj@q~MGwwp6(P;Zn2AeScq255v4R~%d$uYze53vR$TyAuK{PVy7i<@_Nk5B0n$e~@s zpdz8y$pYBmf8m#fj*2@T*t$aZB@U}&mdg*kOn&@Oz!-`}!aB7iza&0(NjYJ!^5B4l z5^ysGa%lU^5O2Qo!^d{*&(Ub)1DhaN#>kqpj#* zVr38_CkaKWis1ij^|L?|tl$9xP4Mkk6FdAkGQ6P?y8e^aenu;6z)5DZ`0N?GgvVXO zK;vA1rv&MuY$2+5kIv1D?kY~gtsRV3zi9l&<_HTS|LC%e{X?f;aG3z7vgJ$5F|OfKN=gc3)6e+&P7g@ zNOqoc($af^>4YXf>jj{#3SDkUgB=lsZ6N$A(h*3K$#m{Nr&%M2dr} zbkFNz;A&454 z^BUke>c@eG{j4#;sEbOtq=5bfLeVE1yaZEw4a|wmrAWC|BUB?=nl#yh$RA1$CzAR{ zr@pB`Xd|A{Mo<`){dTKg6PY8zltN*?)x;MOJQPkm!%rUf11vj7$LaZU!u|q$5u!oX zNOokN#A<)--d(~4zz*nI5WQpAq)C1mNLJNhFpzxUx-)>VFrHPv3)9~Ov(nnd*!lu} ziJBK346N1$9Md8DqA;R-3iJh&LSWk=K>+cTQxD^{U=-c;>@_<9nC+PeRDv)lwUl&6`sX z&tHNmdGvpGS(i>mG`8PH?ClATiWB!AmO;y*ZDX`cD?i)^Iw?qIv*)0Z?JhwvJ5S+1}TqruV4T*qyl?W&MCu|vT(gf&V0*~OowyG zi@q*KwbBoNsW^oeJ_LcAO(Sk3bdt5twRUUEZ-pSxDOzT%V6y@Qur=GFY~eI!x%n82 z$E~JehJ{#>fipj?#f~txFmpA6ihQPn$B?7sLra}U8HW)**~Sbiv;VpYrUfSZC808L zv!Mg;Wn-LOH8B1JEQWOV!r>G7hgU z0Xzp>-aU!qq7Q1PN&I7=>df|~c%YmR3W*R0y}L_*!|F7O%L1+zH%svMl)U8Dc^4*% z$aAWS*wc+CmaO6Y;5V0{470DiMNN)!g-JkG58(00_g)6s$u7>ihq?X3@86fB#uLLZ zq68S~N1bwDiRDXF&e^BuYF|&SCaN>k1jUUutFUvl{wV~3Lg#0SumpZktVXPTxh${D zMPv$}KH=ExDnG43v*BXo_zbiL6?#*8F^If?ij;l&%MPOei!|Tlu)F1;t>r-Vu_A## z(C#~<+R>uhzMMX_cOt8|H)dKDVcIdQ5bhftQWFq>R^{_*0 zIV;rTi=P_`*E3p;ZLaLpr1T*>h6cqVeZ*(~jcHO!*Jo2!4feF?j~@l#dPWkcZor1P z7-KG27WDzQ_Rulkf;6{7!D^^-mi)9v^0tGp&UR!vCXRaXb3(#n=nswV? zABPZkvgijZ%IBfdH;`A1Odu?!C~Gux{u>k4BXn4etqME;Y%?l7(`wFF4GAzCU_R&*83zK|M6!XSp?7#E4OM2D$XJ zCVQO2tA$P83V_^ySIKHAu;MJJg>JQL=%&30(8gUjZ)U4H`(oge{e|)F{UGn7;@3}h zu+<_!8}@pYr%N`k{HVQSwZiEK3OW#YEG<|(0)@*LCPW{ETakSDDsg zCOE4{7Dy>q{jXnLIFll6#&=Pqc(FC|Wr(P1XB82k&pcFw9n*&EivJ&&USl;>%j=hNmRm8^)d(NJ3 zs%Q^GsLku5{dreEO-9UQ)m+nvFpBMOC7I$6o^2KbmK8#@n5MV=^9~Vv6>{Rf2O$mZ z$K`H4LQW)~S8BWGb-j zS$a*Q_20XabFo;JcSg25k#rJz8?M5KS;9d@@kI$S*IQR=D9Y$ zGAd2%M@Ij268ZE^0|+?gh6-RP-*ypp!bdg0r=;N#xeAH%D>pTc9r#XzpVbwfvp|Zo zT>uYwh7b{N3D*aiIam!?Nbpw&56bKPLm>cO0TcEe9(DrzR0@52Ei_i8?|W!|5`%>{ zunHI-9(@VD+BFi;?nyvV1n4=MKz*}x{EwJ_hY$!XSi$$a5KIOb;pnKGJdNsp%~>6n zl;jWbh!|E^|EfGnnEJ26?n236L|kG3$@Ae8u*KX!!33E}O6nRbAm;4eB442o+?yh= zcxq=xC#h7n1VLVq7}GG_ijwKzQ_LJ_ehZ%E6DTlLfI9-+l`cjFraSoG9UYEhhZ%Zq zktaI*LsAl9idz6X48c2*h24{iY#xAZ(};adlutsH~zMC z2eDzF5g4Tmh~RM`v^`d1073JEa&n|#BtoHb(RUTKJ`qAK!K@#rgz;(bjPrjo>Z(MI zw?dEV-zxDGOfsOQ=uMMMF!Bu`kATBJYbtX5`)(&Ylv>^a+y$GP@Z*pYboiqtg06%6 zEqDONgc2<#>A60k>a%$|jb#C;XgEa4hh4L>H6RewPFc~`2tO}SD zj2O`82X}v9#%djg524&Us$DP$pTTmZkCbBKc>NQJ-9iSI>!|(M#6++nQV{ayOb<;W z;Q+e_dp{{L_bR4W*=K*t*RKQMfvH;q?P~NcSAUVG*`Aj5n$D*>1f61c){GKAX&e{P zCol%m0uK(Yz;Gc-0k>+!!(uo)g8G_4L9oeSYCa5a2p9E0k&`lS1F5R#n1v818i4*2 zs1zgEX3BKqe3r|zFl}IiLgz686XSat0HV=X%u_rx1+HAwiLN!L><4Nu1Ne|HM#~Qd z`xGkr>?uK^MG~?g3W&dz#AAqBTxSuVeQ5Db@rpW~cN`CN;XJ|y$mLEW7D^L++GB9K z$x&R?0o*`%2+tLf^vmSZE_n{*vLQ|BN_}v)1zg-lEJv|?fS!&Hj5sLZosdFNrqe*B z7myCm7ij4^AAXCH=ken-^d9kqcyrrBtu|mHMtBd@$${$<2O0z!%zA(Upwu?v3ePnN z=C?->ldD`m|Q) zlXONTfjOrhv@UeFbwWuPRcm}a&2GUBPZ@=;qb>z70=eiVj(t6;4LKH&W8gWQi?~Y; z)xM<`-c{JX-NI4D7!T6#*w~9wMA;0W(5-t8Aef*K0Oe3FV(8>Um(rFdrYnHwU&(-^ z!{u_J@sJVa{Qyyix)EgC346L$F&Y`RxnJ@8-^g^FFa`~rS4w5z<)(B@U;*9q@h@V;yggmvW>{&K2fm?kli(~0uP`D# zpbKMlSoed|fG3)#<>!*1f=Bob0iLSF(Ohj3lGWfD%cD`jb>{9Au&{shmCh6P?1 zzj^bf`l!#zqJN6=fIA7p$?-%mmhP)ukM}D6&>t%E{9pD|emkS?ht0Kx5S`F~ejVyK zfKX_$W6TWGAz3_1rk@<6nKdf>u!1#6jFyr+yOaupe1Hl~v~h4+G%*`$D4jZv+=KuT znjV4Zo$ffj;BBX|F=!L0*#zf0vIP^j_;kMbuU_uMuwBp*5`xG3w_5C=McYtsiD3E# z=W|korbYl(ggYW{wlfg8(YvXIrMRWHrTqi=sf&x4Z5i?L_A@yMhYBF3-6fAr+Ddm3`_QuKBS7_oYuc;{h8fcpoSPjVDZx6+4zbspc~F|rBZI}GyB-?X2#Oo2xa9!Gn@5vAqa68mCOP%vtsvrZg>hj=o*qK!)r&i$(PkZZ3%QDL}Ie#~hl zHJu1|=W_NBL54YXbn3G4^=o%dVrxhEsQGTA^yw)61{X?`W=C`>a$K%~{tJwC?vD$_ z0L};8rI_AGr=Aiyw?j`{DC^s{+p!Rk0N3gk`(>SfY2!YtSYS<-m)DneW{_~ROxLmR z4wfU3@xQeMb_hu0Ol+)3d)sFEZ`zdH6nq**n;K!zBG^2_MW(k{uy58|G8zdv%#fA} zh6#`q5SxH}v_nffA_EVCEgD;%?@ZVF!sdt!4b92RpwmcwbQ~{$Cwt|YE}#=eD+VB zSqY}NV)De@jOQY;F-6Ela6c+B@=T#-#VTm{0X9?&L_NAgMO&w8qgLrrEL%(Sw6FWP z6^jv}CP19M1({qB2nxd7$e~}%2HV+xFgGuPq3HyvN=RbOh{^Lq+z;S1WCOu!DzJBm z#vXejy?iWtmaxAl*;s?cF?1z#h{w z(L`-(GzMwc&4{k=3~Es!hc?WQzhZv(hz3NvTDPYOG&s=Fj3TDqi|k2}AON-si_4|C zV?G4gSZ+yH6wyc0{t@e9kUv)&c?pf2l}*}jX|IDv)2y}x5iFoHDKw0|2f4Q!cA;-t z%2M^D7I}};v(6^Q+vf1Do0$OMrCGoG!h-MmKo)>!@BnoK$@^>8bI(&MeMwbh(Ry9%uf*r1m5%n2Vw zFl2ksjD&o;0}5W~G|rwqtMgYuxE65MCI*Rc|G4L>0^X+}vS};SKvH7W_n8N61+@Fq zStfR1$_T)Re1<5qq)3857DVSaQ>tICwj#Ts|KGZ`e5sd`l44M<#HR#jgXjU<>6`#d$*yIiK)nkILfI7KS zXgdZSwifB`Q!$R0emy5UZne3*%tgg_4Fh6OJ$e zbaiQwl&OGcOxQq@1D(Zo$fbi_IcVz>gK`C$9S<#55ICs3C=Kaaq%*hI(qcEzqkWt} z^Z;d8UJY)kWo__YBc{bKpP}Ry{mV0bDqX< z0Q26|hjt^Pa&Y>~9osVz1mF}DojjWB09iR%p_SmFQ-~eC9&|Ny-no7xnR^4G zG4l36HTWiVPZ-i!Xn@K9-pDra?Z$!i8Q}p#yKMas0G*f{NOl2@TLpj`D*6J%VY%mq zaON=oDXk_TRbO+|#|;EmpVtt#g-vMWZQ-;3#T&7hmlTxTfNpXY#Ely$I`euT(wl;^ z-!Z@b)3y~mY<-r`8k%jm-pmHt)ltTYuqvOVZ&iM>R zWT3WOAcQ*@eY8L>mV}`>-xipJIOe@yv@w${!kU&Fu;c{XEkU@>!_o?E=Cn==(Q`b6 zpNT%r2fv52%#_(5b%=8Fw-bn9)RPghx;5Go4;NRZnToZAi8eN{!q{n^ z4;@xgK|LK~B)!+Ws1KFWpE@(Z=sj=0V@4Im)6X+VzCR?Vcb9CE&T6QF!w;f24WrFD zbsak$*qsFj;P~F0Yi(jA6wri!RAU8A;PM}>1I+lNNLm~`tAE9Z>6?XVf%Tyd2pt~a zqPRd8526>1|C4}p?~{V;(RW|a34Zz#1z+4$Lb5Dry6qnH8E!*3NdL*qxm*AYhvYwm z=MU3Lg>3ZVujVgc8f)9WcLx;}S~#dSyU;XsU0veigF=WP-)UjAuIv~@AN>QU-@6wW zO8oguAJJ{1S=v>HFUi{zX~$OJRb2Ew3}}fYW2!s@#7xoU9-R7g^-|o7F``>TJAs5A zAs9Q)>q-|>1(W#Q_tF2J#FL=M!}{MKVo?Dw>eD{A34lz^e}m!v7l?Z3leq5l1SL6e zkO1A%@3`Ty4Y3za?UZp(a<+HFZlpo)M5R%bfODXM4WLm|K{^OpDP4jQ3!2gno-o5rZRFh}cqHTc4xfKUetL}GU5+#- zf_&Q8*ap~XFE}yi;fFG#1U#kY{yuOPfJzI@+p1_{&e0QmH`!L&@2dWRk1vFWwLFk#jLoXwmszZXhXx=Y{GQtkAOa3g%E0;a;d3;la zYxj;8xm(sSH;1BcFN1r56EP^^8d5Ua zh?Su8pKX)R1RIs*j@#--)Kg+%(FZ>hL>}oKgl?ENkOgu2qa6Tw>SytZ1ZfUtl+#`3 zJJN@@`UnF->fa1fxNs8CQ+EIHFhG{g4~4Y{fTyNY2Y)##?Jl_FQRB0735Hx;6p8i% z>#tMyw&X``CZ$24u)CUNi-x?O8Y3}R(YeF8!h~#lfySaz5=kC(x=pmuI<1v9qsLS> zk;;dU=ou!+(1JRU9|fl$?)bK#jw*Cputcvqj)BO}f=Hmj?0+#a-8g8hS5WWhU11uB zy&)pu(|oIlhVh%Xf{YIF5hyqywxZ}C*Sd02$2Q!dg9U^wjwZxHi6-#i;Y?z0J??v<39xsE1ApVb|K|5BE;(N7E7Ux!mYT)sS} zYI~^HX}gcs8JZ4%9-<-stLO=r~2Fo zcq9+z_W~4~gC_rTjSNOYY{}EL!t)fXOq~(;x#uI<@1?xa=1q3pZBb|i| zloJ};ZAVY{`G*-oPIv0Z=GlzRLjW{_h87?%ow~Uo(DjprCqU@j2-SXi8`U0}eLBTx z=N4!&eR)0A!kBUze^EEjiXw)m@SCn+FWet~F|35x9z%tT`VT&DG4dsw$LnZ)iV&}h zfow?iF_^T@U|%W1fbG7+VX?RNYHP0mKr!hZK0TJ{Ae&~I>b=zHEl`5Ibu=R(-D9>^ z2*Q>AtK1uYRID@`p<{ivf>I73dpzMa8E$3L6avx{qI#$7yZO}ko_~Gg_iHP1Wwbib zaJ^xGBGfz4=j^UHIomtnwf7pMg(G!^dg~xK)Rqw4`%I;ril_S^i&Ac7IQEXG>}$`6 zvpsviyNA0tyeyF}4KubV4A^@YUH;}NxXpy1Iy3Y@P^6;X?PI62&8Tf`@I9S^8*@tx z{VW9s$nw)+hl5f_rb?kcv)Fn1o;skyxnc94eQ}oEw>Z8l0djh7xIItTU*E>KU1Pbc zbT1O5hMw<4+mboVjZHedSjX+pOLtG6ugQLg{S_voW>~)OmmvVcKMp*5@E|-#$cyoV z^8Cj~4nriK!WPcrk*%dqsJr5e9B~Af46N8NTJj-)?UKA60G0&w_($sqoMnV%e*{Nj z=zN~wNgfBmo%2I_ZVIm`8MFH+f<&ZJJW|c5G{jDV&e`JN7&ir{(UxcS0ywwJMq@1W z4EfJs1H(3jympr&53?8XNi< zkq?t_n!M;280b?7d%lbo;e3||i=fc1@h!&!rlZY1+ks<942d>Cm9*IjEVNWM3d;Wm zkiQsiM&1K)q)}))7Z3}k+G>=&-e@s!c1L42C3N|ZH=hRJ7C7jv(KA7>`JYonh=uAM zKjNS{L;gcpp&4f2 z)m_D>E(-bvPZ?~HuSW?+pXXc1cgClzFgMdf#kJh0((HA0A5<3TPzSQqq<#;jW5$rRU9seqlrkV=8u{0ndB_&|H)on%fH;2TuZ8`$+Xc8Y4?8JRuJGMs$6euC@EL~iZk6H0$p&-QvCH)Ng`2tI^974A;uFvV+0#3l&%ilVnDnuF>|-PJ$l5&!X@EP>s=;ny zyQwnx!ntx(bn53OoJG~qe+Sf&4~JJ*L0bbi6)N8t$r=TFPboRq917N?=r~N87~;o;zmBPTd|nOvBpZ&w6%?)rTXdh6THF0A%wI;`5wPz+|Q9)tuY9}l(Uk|1gL z>LZ`coxsROnb&Mk01;&)sufWlJ}K`U-rku7R6Ep#ZUhE_#zqwl)%p$E#k)xe62i?f zsS8O-?#*>66M#~Iz=%<3g9rI?qUa7vOJsyxEjtg5QDE-9F&{i|46#2%iaLY?w3{?( z2(NJOH0W=jU_z=~gaD;@$k7c}grn+40`L=SV>%u%Ubyql_P+~nr_eh4KiijPu_+aZ zQ|J^lkYOudKLvonIk2HwbG)(G2%>}5o-<00!{_<%M&`HuOlQiiG+p=RexUip37bV@5 zh22obW*jV5EZ#S4M>$Gw&^BnL)4F;=#X46&2C&3&9f1k|mudg~b7+sMgfdst69Ab1 zZav8ay*^MXJQ0Bk&WxIrt4Kyf1k>9-Hf&5iZUba*(c}=g3S|)^wD}PzMEu?EThQo0 z#(%Eb>EN#d$HYhFKtzwE+l5Dp-^ZIy$DepURv?ZqG~%&BIig|e!7?S87HrWfwF+JX z&LkY?M~b4U&p71bIFj}VWCU&zL-MR#sBL`-&n zWB^{PUQ%H6^8BI(=zBTA*rWKk#fI8{74;ghYm_Ez+`?7TFa-goF7{rhp+i#< zn}3Ssg1gIY60+Lha!DDz4$FTz8J3Z}EdVk|pTw=#3f;FiI8Z0^J$9j8>26nw0390V z|5I*FQjk>mB}Zp6^a67c))b@ZBR3qaAHO3!HIoBVg_3HUdwyJ`t>eFi(uE5Ya#w;sXK> z0iYcOk_;SNh`fS@IE#R_g_8}$mX`zo*}`$J#lC_^&~20&%*xtu)bmzId9P&xxdx$(9S#M991hfjbwVo{ zlvRi>F>6=>RU9K}BmlW1ETyNduvIeJn))EXC<`!=h}Pje7~Cz;+p~RqyEEj96oKw| z7|eB@0^Ur2y6u@&8=e+x8+%mvth;n=AQ_<0!( zMaBcGg;``W-$A?tE{)py?E%jE3WEp?OrdT-pJ6%z!Ej>3ashfRx4*pip7I9>oN^FF zxOsM!u>p1Ee*e+%*~==Z1qr&mcJ&kwq8(sG z=hHwG?G6$!!iJ3F$HubJ8^QwZ7yvSc$fl41@*UK-_EdDse~aR=|mQgrWw(@q`03bI&VaV(MJL;R+!GfCa-Yu>}7c z@UG{bmH|M>&z3~I4x})ke|iV|I#_Wud_dJBHj}7GF-eS5qyX$26p3K(=D7r&$d3K> z7-d#J`!T+^@C$x5Z!ab1F!l_lqx{2}BkV(Y^P}~-d!rH;54^c$uu2(P zps+dypZ&`2Z~o+~H-24x$|1kx@%}eI(u|M1&E^j%dK1D|JaIi{;_VN8`wp|VT)Njw zYes42{bp61UrLWBcBokH@-l`p7#>PgwfTZ{>W}-FSVxp8v3}Q=BL6&Z*Z&8pKHr8DlOA^VdW8 z&MgT9FxVCmGM|X%DxR-hd3u+rUza6F4p;Q*`;*PG4`9#aJ?yV%!+7y)U%ZH(^#LT` zX%g##KoSH^#(Ed@**d_0r3-9J~AD6Q!)dE3Y4Hxf0#D+^n!x5^T3}Cgp|1 zHsg8;{7X@*u@zHRg*WSn5B1Nd*oYn8ZQBxR4JP^4>OW6AuovCQs=ZA5fUS3sRA0*N zpN*0YQt-Tmg)oO@d`q6vvQS^9vd7h(9e#nIy@+{{mILQis>>S|7wv|^m|lIpE0S=% zJ$WC|lht;jFOxfnl22a@39N7%P#1*vvrk_ZgU^ z)e>q_pKdY?t!3lxezR;A@_cUd>F1%k^ zt2AdF|BAe`eh;E2*WC0K%dvgK{m$pUd&R^z%iF+NXDnJoXgXy;bFwI}Ey z_u>}s`Wzji&T@`B3*G-$q$c{2y_^LnR}FV;PtF_%&CJ6Log5oRj%OV+DIN;{!pJy< zvz>RdrKZjaHZ|mly1GsjM6+9tH->voMO%&%Bu1&_e>eroRLGkedzjVaT`Q!>*IGK| z&V-jP$EC~|(Ve_}3(rXl}{i$s& zM&K`8*}C9gfDfH8nwn}+P5ww+CKA{Il9ZEOb)mzR`-#`6)SiJ&v6i&T8$V}wTZx3m z(cCQO@2S$6Di28FxKRRsq=!Af6kf)sELee(R8%^vQyn8_`D#r`&gVls?nTZa%I-Y7 zGBpwYmT=EqZ7a6^?nI~3(jfa9X>%hoGV-^>reck522;_j-1=JfAa+eIf2}Bux#q+h zRu(_P-)=n;f@aU_`=(~EoL=Qf$B7 zN%eeJ$(Jm3@wP1#PE}6 zOv}3qi|&|JBu#ddhz_JCLG+GtnHK*jJcGv7D6vJ%mw8@S=6&Rz-rH@oY!6gvnKjj( z&m|DD<JibLy8xjC8#5RSL0UZ@Z_|b!B$k zZ1x=E(k-vGO`nSn73^h;@URK4VhpKmlNoY95tA2@PQ(Qh= zZr&Dz8Ev3!+UH8TYr%!ts=ylLPySJT1fpa#vKMB&)g&{3K4^LAOgT)YrGj}2N@k#Y zY-#U+UTv8Ke&;xqfK?JK!JnxMva#ZppRL#f2K0zU zPWN+dNnW@Y&bPBnM-?#*XRtZp<5WLWYWTG^zOskwVK8B}^v)-I`bl3jx2DHs4sIpR zxt+e(IZc(M%OG&m=5cOuTmFf zuq~IN6A+J#B?pbE4V5SvvqxLb6yPVKEoo_M4*HkS%85P!x$+;W3lIcqt>x^ir>13& zBo(2ZK9YeA(@g*7KJW4<2u&74k+_mG_3vG2r2l}(^A)Pus39jc*~cJ;GBRl~8!<9kUUv_!zRCKs@FXy8Q%9Vyk3*4z6**Hc;QWuwT=Kl0ei8&|_KqVg9X33b5; zDe~&==o&(9;}r(Hg)SO=SBR%|#ZO9#^kaoWyU1o`o22&2aa_=Wt|h8IEnYqPD(cpr z7%lBtg~;!Q14GT_QlT^{zdfiFDpJK}8d%;f)a>59e{@S?iwzulHa2>%PM3&%Ii#M^ z;y~EzBpngvrJH!`a&?Glz(ETh4;ym)(`d!k_6qM8$%YKB+LO~&_ZDv!#5^{cqJgXj z6lEyK{r4Pp*{AA0&>@U{MTta=xvEQB2!_r-dscZovpHH=rNFN!KHnu!#Qxl+%BXbV z(8SrM25bCsRv$K?lkBQ>WkAL!!VMLyffP~C)zzEdU6CozmNgY&8<=IkJM%hSF zjS%Mj!^A?-Q*N=-`{uwzgtbP@VF!z%C{rcR9b?m{aZ~Q_g!LMlikr72m}#168|vP= zDF5zEfBNn_RV|e-qGj#1wAG0Q%uT$B4!0A%45P~TQGz|%JYFC)qWtW+2k>E@|Biej z0qO(0v9~rIbt1pM=ld|Z5=`@m+?{;?X@Q?Kd^3Xro|^auU|kNQ%eb0nnXbuuP>yOa z%~JgKorq{|yJSz8RU7|4*CVVZl)%J4OC8+Nb~4GO(Xz(&>6CN0_U32c6Gw|H3aQ*2 z`_jZWf)P*sidox|2!&Bo=Q^e!-k>Y~SQMs6v$&|%`FE~=TrP9-Q#P!_)M&hdvEGU* zIMb3Yc}@JwMG@{V+x=a0P+ebH{=KAvu^n(TGFizf`E#P~ z-#$VV!SM3Gvh*(0uDFgiXHt%=PP^wP1+DUME~~NqSKTo7L5JL2x`tG_mfqhT7u5b~ z$~>#Xi5|JipwZX@Zo;h9&VIbTbIG;q&3IQIU9eojS7=c^wq-6)9~3IOFs})0@XyQ z5p{PFfApBoHvEb6{g1X1g%2KTsLIQimC+aFgL>)A$ zPq5FvL6d?Q>nxU5cCdd=&1pR2DR1jYD^!f6eXY&j))7c$}ja^`y9kpDOES zOu#^CLUW0vU+A$Dhlxq$(J%5}&paHwJn$zS-j=@5VR2W12God%Zuy3 zcCFyF1@oCWi;mP&Ndb?S6{3BJT|Qe*Y|I1Wx+AX+9LF|{i)qNI%9x35JGhnoL_B@h zK;q&4K+ocD~GsZQvMh#cQ>51ho;%k{_De-v5RQu4dV8FaI z^%s$dIBNoo9$l_xOd%bC4Hy2@R59(H{cCMDe)>PYGR(WqazFBqoggxsd3uBS%4c>VW(0qI=OlK=n! literal 0 HcmV?d00001 diff --git a/docs/output.md b/docs/output.md index ac61fafe..ed843ee6 100644 --- a/docs/output.md +++ b/docs/output.md @@ -210,6 +210,8 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- [mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. +![MultiQC - mosdepth coverage plot](images/mqc_mosdepth_plot.png) +
Output files From 62610e7cfa9e2c8a0570c4b41870db27b3b880d8 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 15:40:32 +0100 Subject: [PATCH 065/129] Add coverage plots to docs --- docs/images/r_amplicon_barplot.png | Bin 0 -> 29052 bytes docs/images/r_genome_coverage.png | Bin 0 -> 38808 bytes docs/output.md | 4 ++++ 3 files changed, 4 insertions(+) create mode 100755 docs/images/r_amplicon_barplot.png create mode 100755 docs/images/r_genome_coverage.png diff --git a/docs/images/r_amplicon_barplot.png b/docs/images/r_amplicon_barplot.png new file mode 100755 index 0000000000000000000000000000000000000000..7b2bcaf7aaa07d1d1b9e367ab7826a4cfabb3dfb GIT binary patch literal 29052 zcmb?@by$?!+Ww1$qJWJ^42XaTk|L6WNtZ}TODQScp@1meoq|Y6OA9F7Al+RlE6qigcaASDr75CeZjjcE4L#dqcN=J96*n)Q6-2HfB?8tFnWh zr4rMnD9$0H7B|zW(&SePyqXDfom$ z9Vgwwv-Y3>&US>=c!cUUX3+7G%{k~s9KjQor~%ASYRiFE9r?>)8pEZ7j(&k@{35Zw zsG|I({80*V!Y78Q^7%ZcU#>-wScfZ z=xc6oWcOfGtDT5=}vu|rvz^zVgbz;&fq%;Ld zD$R~`+uM_qYbixesP?;yt4xV^4U%lUEHUm*NU>zy+^;T^+aD#I(tqW!(Z8D-Qi{Jn z+S{qx&1H_btu1M(Ga3-iWj%I~v`^%T-w^8{qbpzo#8z%DqRRzZ&r0HfP zVf}1jK;&*rbwjm(a!ml=Bxdxh2S@SEq6thQ{($WA@Uulxq3;JZdvb{-o%3bSA&k&bkxMfyJ{C(IAD1-Z2#HF{ENaqu zMGnRWvz*uLM@cMBEp@G}9Pmv=ERK!o^C*|1jvIA<+p%>qr0F&_%Ty)$#9eG-zF_RQ z#i;bDc(G90hLICW=H$(Yy&F@7i}Gl+CmHRip8SHx=rbaPjhX2pvbtqo**JqLiZMngfk& zhV~Y}uUX>V;-tU5>b*#KVts^BW_FW(e;QFC7xs;&hK$G(wZHv!OI)d>ersE{Bx`w+ zNDj09ZGCG~&M=}=nQQz+{nk96{DZe%G5Q%N9cNzm*U|IgR0zCHySXzg^FypGfPQ#q z^1G>K(r(oz=GeYLSC1dn5CcmWhZ0AiBL|!GLiCrcRG*FTXa$8rgV|f*=K9l%tgNkk z>vhc-7s5@pGC#|2%X~E=1W~ucninKaZ4wO2A1oQD9_XHLM(1bjDK}v-%e)61#VHYb z-N?5tM5SETb$QD_#vk(xpvyjN&UI=G?TDGBnKfQgg98B)rpninyN@&Lh}^BM>uv>XK+bTk;HUoB6=P2j*#unEoBh zGJ^CAr*Rp=xyzj$s|s7TzX*Mr-dPzo6H-MFN?I~vmeTW!`8KPZnoJK^HShODwy!Bg z?aXZ(wdy7WTo+d+KjYx7$llZ0ORKWwR^!-BR$Nz`MU`)wsOMHpPH*>OZ9v^pi0M3I zNlh8orvrQ=9i8s1;baf`n;xBI2VQ2%OE%06reb|oWm6}XNV`#qa=e56LhRRWrZhy0 zj8Zy&E#7Kx!q6A3pR!(DMSr)nTvjlfASCQWHDgpKcs4SathtA`=4}OPji>dPE|zs2 zQzKY)7K>^OW1at0hWId7q{JDH&$maE<>CLipd_Ib$ z8beMwoJ{+uD7Q^J*4+GYZ;o#)6v&flsSV$1t?zMHC~=#uhZ#hcq2J>!BZzuVt>fav z?%#Jj3pu%QorD(1W+|XAmY402FH*WDwLb2rX1behAhy~D=$v^HKIpPu{>T?HxmF$i zPDMO2`98PpB@We|%RXH})6}JCrhP3rjF89q$59Gf1NG$%q|@xIj4M?XgwtGuwx&5H z7Xsd1%}rHl-d)UIFJiEgvy_i0U=VTqL4TLVxR2MBzAkUY0yC{5adOR^C1m5fGp>jt z1c?O&2HrLd3=Y1T?7UIrS(oa;-T5?X^NhDhXuUz_{7l%x6vD0eJ1iU9?`p|yhx~<~ zDDk7-m>C{qzf)0iwfJ#UJcp({Kki`^ov_MR%b7NLOhsMG!ffhF&u7H4g+9XG zOV)ID0Go(+N_B>ZX+yZm9TSbGW#fG_q%jFS6mlq&(pqt*y}9b7p_<$bdr8}^tW`b{ z|F==~f&(1aDR1(lAD#5+`0lm8DZ&l4ep}-0M_y&Wa^_}`a9RgH>=W~1}$Ry{GRMH@E zh3K^NWUL{+x#UedeA?UI9weP8*0Es>YO@HZU7E_y`(;S!+?|L(DHKI@6~y6WoK-dc zoQ<~oTkSS7@GB3cD6e|&Yvk7}d-dy|GW6{z=Z!?_-KxA7CnqM@?)E&&uslW15TWAA zOvYqKE2mm$Yhae}tYn=n1j+NoQY&G%FvxZZF*_{7<;EO!Q#efM?T_&`#>VK|@+!xK zR9PQgNSBomPPGrWn+YASQ=ffjje9}>f6c!cf+Wk;9VM-m!z;>Xk70m-$@$>jl>&^LC0xsN#22C9OS1+}qp8~}Cbe>CqSI1(w+{JvX z!ngS#=gD&9F{gXo$gr$wV;17r$B$orDDRvNqkBZ#eA!5t0hOyJqH+-Y=%{03?e09X zZ(ESn5Z|XIbrZq%`YR?&gwzssW#{XNn@u?K)0Enf;g&U~e)x93eM9h|Z5ch;)qFaP zs&+np#KZU$LyC9d6CTQMCu zx=6#IMQ6O>C7*W-iE-w9O*A;jCBlTay*QnSMmKW4He58cys|MINN{zYHbNZ-;y*XL z`u)B)m;RI3)VNhM34|rjy%Ugbt(0N}PL|@$#yoruuQvuHG)x4VM>%yN$Tr5!yctp7 z9-Ka)-219<=k}9TKg6*Ya?kF%x@hg+n_6{_(J9S0&w1Gfz7M^T6VV|IIbD{%%cxC{ ze6iiK+2VaEeIhl3Mp{nC-}@*S3Ni^;Ll9q^=*@P5(F5ztDf1aAwd!r-m)`3f<=<0( zL5$KTb~SJHbVZNw-mBP)$w*F}PLIq8dfTk|h3UK{XfKkvo@RMx%*iybA+KEGwa=55 zy*>rTrqdM27X}H6L-VTFFUJ~OeARxVCr^EKjQ@@=;usW*AX+>6R9QUxEt8nvO4wd% zlZ2jF^@qUaus*!eOVX|u+7)@LA-@V-vvPJiRk;Z9i7lXV+m?gN6~wiKN4|)ha7}2?yx4V*?eeNKz4aY)Z_?ihbQ_nm7IYE7KCBk!6K;E5E)4{@;))LCX`Jn`zO zFm8Ew=4WK>5YK0YDxCr@Ws5!mJIxc2lY6-_i|JfJOq-TtqpRDVh}*U>lm8~px6vyM zR3#ps;KpRVcRk~n=_71S#`rB!6yORMoa$NIifeCtcGhN?NSaGsx7pv6qZL(5?KYBE zes!MRW473m) z$Jxe$U9QR2^4u22a|4}eY&)c^eMCaENBOxEb-m3*-xd+DvJ%eC*Rju4tCLwmnb4|p z*W;qerh`Y%arz$P{IT7300*#Z%!1_4-RD}YuE19(``mvD3Jmo05UTM^Gar!qx*xSN zZ8+Y()1cZrT6SG%^MXd(#v|{m-_+&rZly$mzLU4=Y~JkCJS;4nfV#h+kny^tDaQy0 z`T{R3HHTKSlpE|HnJG-nGQ2f?p4yyqJPDDy#%%~L-FUFSPjPzZUVgDIp*GFuzBvvm z$`3vsuYAEGD<_BUo|KvM8(`xTW^@_%4egakeh$=0$jJn6j>4I9HW7Xa3r8W1 z+E{tCs^Yl_%1@aD;VgdfhBl{sJU8LJ!TcDl?TgMxd)MR}pY9BnSOv{-ivX45P^_!9 ze43Y8L&UyE<&iI5M+~nX@4q9eLRxzQt|XaT=#i(>pOaS`nm+mY`=BZKZ7t$s#uT~` znoP)0zfutkPH-#GRe;0vCvou~x#DVtbPLwtK!K9-xS%*f-0B55s7$ctUsT_BSdNYz z-OnhD1eY{c2(=m}8}f58JW$3Bt0jCaM$(H0^vbQb=-6C8L+czzKY6D-sc&NOYW;8P zJdXDAI{+l@prp?m>UIh03*4Vday>q-LCzp16xdXEGwQx?I+IBUwBYigQv@I#Hoy2O z(N71(5fPEmWm@l-I1q-7cUtP6**)eu3Rfj_Uo{GWg^syVNyCcU8!oPUxL$Zq!=^RU zN(81z-6dDLb*5(cnykZ$yR|KHs{+V5}Z%XF>=)T!VQf=_r+{ z;=%LoWfZuzpYC0--ruRh5f*R>ilt7Pi%m1>5KrG1@ZpRmP4o`6R9jvv@p&#T3uT@) z>FCntpAW{%F{t?CF1@cp-|nqCms{Mwf>|!Yq1%Cr-h~BDr2Ohmbct8LZT-%`8OZ60 z@GQl5S%+66M7r(glN#Vk1zoc=?MQWxYZnKx4F{2aH78{*b`dx%p+<88HoXPr{$3lK7v!G*aG4)+oqx_xh+2 z4&2A&ZuvY>VOJONAuQ=`CB9ywYUU09DFxXq(#q7PX{vxW2D2L|9NF8tKuftRCPI< z58%X#_3rR|Hg-%GmryD!&K@;cdkKop-7Aj*(uQCVb7ES3e?6cnQ7A%?$#{RBCKq7I zs*Bzpz4cOidzQ&OvIYJbrQxE=WSD;5C#XkbbTae42AM)i9;FrMsikRIa(Z-VI=MJjw2R zG$tKvV=CbbftTJ}%j0}I66)63cjjG$lzN^)!OM&K{mjDjkRP2)$tVuW2@IrLaipCOnEj{aDCY8 zUDZ(dp`NfkwJebiij}lHE^&k#d&Vx4(TY)cvElaU8A_y}&DU!y`jFZ&;?o>O+THb) zo3)_%f%c5s;uyopl#SB30C%@N=-Y{)M)qP^nmhq-&=s=1ZRL0L#yiHfFK*rgMQi3C zz!-o^P2I*sG3Pi`jzIhm>Iy>58m}nT3~tqU7B=Li-bM*glDEfjNJ~p#7D8|hd9u~;$_t_H21!R`?R|$#VLADP z;e%P1mwFq!_^GGGeM7)lSBb!dYCe9TR%QZaSfQ5CTe!Cz^W`qmKDZ3iQAjz|Mh7UkNH5_af5iLh{xaa@QX_cp7?mA;m#Et<&Vf-vX)o1y|?kB zzGF}(#FbN_V*9MbQBMz?KEg*DnlRXCs@R<@{LV6$KRNwTG>MLp_fZNRw?P{gFbdEtUSiqvnvX(b>Z#k)oz^k&P#ZYMjd@S?{APP0f`$dl*g8@QA zm%Phcd4S8(d`^_l4fK}sFsPL=DbP3FFD~8Lv$5hTN1^uM-Ou9$(B*GDpZ^|ktYsd7 zdaNCmH$rhmq|cLdnF)YbXBb8O1pquHhvf)D4sFL7z6d40mVIgCL2(HB7}#;|O&WS6 zrn-iq|0>CZZjq#Im!6u@8iNbyVxa%Bh9Yi*F;$H2%sbM-R4Pu)n@QHtXcnB!g|pgvp~4BJKiv2xXRbOQlswf;N)&+*o4$FIvld^(L92B6x zeIbOr%UgvL@&mvHHp$l#s+da;+BZ#tA!*F#sMArY-1DdfY$e_KPW(f?NtFO)u-fq{jX%~i=Lcw(R)%F%nm{PIN0$h;%iZXv{>iMmXQLNZJodeU%;KRM%|htwr29YK{jo3QkWkdCXMFydX*&yRCQ4j^yTI*oTQKTQ^3p4P+#G zAieaePtz`rQO)KLTG}8l(JQu$+?+WRNxJBwyZN6{T2vg5H@MTinpcF}@%`-|-ALjN z^^@<@sWh5%IMsWlbFW}Cmo$bm>)v2OJZNh@9ZhU47JQ7_6v!I|PqWxEwC8&c(k)N6 zcr!(%vOixWhGM}Y?&Cw=U=E&|Uu4O49w39TDeE>ReCYP-X!&I@WVzAq; zk3-^;(G>e&0}OAkVsV(ajVQ)R9elP|=zBDw&7*P+iX~$m2sHCQ9QOJacG(AonB=ZU z10vXWHZdUp3&rl3bjY`pK@5-K(krDQW(mDG{-0Z3>6)`i)V|Kg>#kNJwqR3!!xuep z@I8*@9V9X+Vi*m4qTemy;$X*5jh*k%fnSy#3B=Fhn(GWe;j*1VsTV0bbVFd!?fG@& zjo~T3AEog7DWC>(WhKvNv3wlp1>CFNMf|vT(ff$uvz&>l*ZP3shWL?;G!NuXbQxnb z%kJ747LbDf^>DD)BinLzrnL|q?u-w^E}FE6mJhm|9@udpX2xNxHLE7Lq zYm0=fpN{_g8sQPAg!-TTG#{dJhPA@2e#-g7r1nvBkX z<_~f+?wwLN=(eb9pi#}sMG$!1nj>W8Ae@_df7GFyBu0FaRzsPI=?z@g0jt)-^Lc+( zJQ&g7PC!+~;U=*RK{w!X9~^Hnduin|Cb4ktEuxtpjG|Ik`ux|4e}$CeCEf;=-4W+Ce5{U`NWyU3 z7X}QZ=BgV#i+d)Pa>`RgJ`yL#AMoG{b`t@0BvloF*af}rT|5JK~=-=8J4zzqT5x}f2yGQNxMOU-x}z|dkLqIIinx(;Sq zM4Kx5vOE;0(+#7NF)UDPv{VSe<0*9+iF5@HaWa;rZ)%Mdc#zqD?qhs}a|uKz$ZH$h zNI*4s#cV)Cdk7d_bp%-2Z^iBDo9ysxNmQA01CaUT3GOLCO2qkiM&`5MsqHk2+x*=9 zZD>m2VcOiH=fib%+t9)K*->*EE@skKsLJm_qkg1ZF*-> zq$a*<__cAiExCw3urIZtC zN@K&>g7t`21a9kXT8jraRfI=O1+a-1ya{}$&Y+aq5bAc11(K9zWuzg?`FE))@a$B> zev+6dFWmCg^w4r~~C+1K1|%52vsB+lLT z!!id3mF2!UQ9d3D5ss~$zbF-tIi^caPh@xh2#1(mmImS7=1XCBn zOtro$>4JTL(cyob8=Q$IW1u{^FmbpwASc<8+MRnmv^X!Q2sZyS_n5Wdh{NIbi}3ij zfz2;&5uxv=JY;nh2rKpS2E^v&(RQZS_MN1NSDs5p#rG5veI!vkR4{2O*tk88NKgeHj^ zs)l#O`T}EO${Z3MJ9Vr}GbW~tyw`SkQnx+L0AYI`baC}P00r;`>S2zaKAmPQ)-Q6h z?RBwPde^q%!Xwm0P}0CkpjmZpR*7rKpO{W`lXJAYGxFo8M|lTR!liEa8kmGzJOYiM zk)^fAGG5xL=vcPG2|F=mUdu_lG(Xcd55a2>CSWCE3`VEtTTVdL$)Ck{)xf@ek%Nvk zEn~@v%D&clZjIoKL+RF!i(&=Oc{_uE#AT6Z;{)qfD7DEFt zTxOw1wxaLF4ZM9hd+PKSq21@0Lg3fJ5~l!pcLx@F+}Wj>n9`GWZZL5f4)BHfvg1&V z+6&$`?)sn8X7IS-1qX@yzA980+w*M=msT=xoZ=;O$x&olf6z>hjVlw(l!sIkOv?#0 z6OM77a;kXIK7HLn5_cykBGbEA=C^{;-FQ*R-{)~db%$3LBd$hQ^ZzUt z6&wsx7bPm(hoCC3>Oj|^?bb#`Xr2OHCX?d?&{pJs_5-Wre9V3}10_uJjxj>a;#%fy zxD7Cuvg!=D6o0K#D|^qO*w-+h5-e|};t8&?$*JeoCGGyVf0^G8+wmHy`B-RQ=jpCl zA)O!eW>vaiYW~K@dNyzRfWnq@Hny2|qxzHd+BTW>IWV4cx{g1uo>J%CUR(Lir<@{> z0{}P&$B8|G?ZyF?a6V*L%;(;#|G};Zhyg$cq)0WdyVqzidi2lP4q=B$2PU;I`0RU> z4)hIHa$HF_Vu;d15wZt;cQ*=%UG072Pe5G%86^X7v?_9+CxOihJ6C>Vox^+(*Hm!y z{f?$c_AxJ-|KhVPPz6L?z-fgk)|5OB*=HdlUt}UX4oQltlWEc;xAmTl&GilhT)-_7 z5dhzoi8w*x$EWh^d4Cno=r(dRFaMHjv;Vg9IBl&uye4#xLODk0=4($jCPM0CI!d;P!bz;aEui9O&3Oz$EmdZ{lAzyFA|vbDHf!K8Xs0PWJWAx%$lAD0xH5&R-mN+XpT@h3MG#e=tCs5f@}=>m=;h<&?_^n6qr-;oYKfS)q(R0f^EsiW~?!MWB8dl6Qq2$i&$H z4JSmpVT;`I8TxxpD1OSVj?NXwFWwc==|`R+$OFU-$_gd9OboC#_)=1AFL;bULN&A|_*gpT~CgeLkvV}vuHXfQpWNNijzq@LZsJ7K1CV>3%aXT(^T2DGo`GpyX|mG%bORjcIT9K?C)uNcFcRTpv! z2HU;2o-jBqwD?85(S5_xJM0Av0^7W&P%G}00YE}0zUa@0<@tI5u**LMBrL%UKLk<0 zkJ5LJ_)s6tF9!e^cZSO^t<{i95LAROLTWhhB)8eLI$d?xI|iTp5*0G%<|sA_dTe05 z{0m)iv()gyl4FPpCTID%4n_e?+C$}c2*;m8+*&($*Ew{+D=h>&Tu2BrQTz_e2Sxu0 zS+K|SjWQl8K)?Mqn!kvRLo#UhRkfT>+MP`it+v5=QD5(J#a8Q4!K(w_GKmJse~aHItRTH_|jsL4V@o^>6(pB&Dm`R%3oB$ z_Xim|sJb4jKO~(SKi`J;mCyHgnSXgy$V&E?kgC#hEy|p#^!QPAfGb!$;zTB;;JU63 zxAOPs5zm4UZEmec&&h%Q`A^zEagpqti9fsDOLxzf0aXKsXAO?7ME^GiR%}|`9K)c( z;2C~q(C7Hn0vGB6V7ev$XTxardUgMPj4ki~XJOHIhyMN(u5tPp;Y)9NR~Hfw>sz$~ z?Ol6(K%aRwA-Bg{Y9g8+0qH*!aNfWq6Gr=%JMV*EBu1 z3iKS<1yJ!ckHQ}73eqC8>{W7s85jIRXbp{DvHmAh!t`2Zx;)vX*5pzDvxKN){Mmancz^ zvI~I80G(kzCQe4k=;opH5znXiv3oARj7z=mS&2i%?3)V{)Bs7X)Zl!hOnMfyo_L7< z%q8nDALy0;fuugcNb0`=?KSjBsKR)&k;t&Jd05OkGo`1a)!?Bx*yj_ha|W<`*&&Dy z7oKxD4vG#c^)BIKZPE$hN1z*t2?XaGCvnC8@Vu zS^rAn7b_mv+~D7)@az^aH-yAHfJQRuwwO~lxi)~j)Xj7Pl1zC?_I0()-FdIc`%1bX z?o}|)G^Z{iO8!JOp&49YcKUZ6_3QkeT4uMtRkMVk=sfqlGGZ7l!PVvToirEqR7zJ^ zoqu9Rg{)2rjBoF0k2+OI(_L4lp-O!qGpxJzcImD;O84k^zs91Ux3Z5bDZAxl$cIK3 z8QXpB>krqB&#B-*lEShd7K@Bpoq{nBLjaJ+{2gqmizezuG+#yB*@=~_tl+IY2$BBw zg<~4OSg|LMTT`iJYNqGU;|h#ueym$(N#`RUrwwfGMxKhpKVq+W^hv}$V4djeVBh)P zz>W;MjM?g5%O4!f8Fo4-Sv1_`NuZPMGA{e@uKE^uAdZ9lIt9d;#%g;FD$a0Gua(*)hmiQGld&*?NGiV zIPBS)<}-`;TE0r&G4AM)lPg)NcAqhxwY0y?Giq5uhaU=H%uh4VUD6<}1WP}^pd^Bn z*pP06hy&l0qdHvniF4?Uf&wjfA8>RR4e0&sZHF2@31jO-d)*F-QCcs-d)s6w0cA!tV>ZJe6w1#W)oc zppTyNV%f47x!a8tw-(sz+Gr8#sV6kN#kKA+zj_tx-*e%TchsDVEJ$-J?Z0}V_}X~< zwJ7m?j0fQXF_LPBK5rVqvu@Mn_)(4oa4o<|7(2-P1sFd)CEX$y7xyYP?2E0}?eB-H z-80Ftz6=D=p&$L213gCC5~XvFq)^`lnFc|fhLhAO6}=S( zhF#QO{Quc{G8jv6r^x2V^7!|CtumFF`QAOBqlo#%YmJ3!A(FUszquWbG-tUMhZs&r zC9{`HnGTB~-k9ltX2F zId8${|FTY%>wBQ6ZaYIGuWCd4?t{^%c2-TG+fdo!VBz&^AFr6(T4+_=GY%V{!WTqh zVpz4BW!Lmi=e0-$oHPEZz$C1s?>`{t&AcJHfB%_rnpG&6VF8(Q_exMz0*Aj}CnfFH zJ=39D!A_o?Psm*is*Xri_4a<4r1^^%GZ*ZO*`?F;gW^l&OPBQiFpcg9={pWGEW5 zijik;kIV-3gLGGjRhHB-*d6(W&e|b!YUFAYm1B2wz-A#M)8aO2fn`h3wp;xM@`mVN z9I6Brt37J*5u3R9su_yb{bN2`{zpIXGVK@E`FMmi0A5!bGuR|1-ZGqBnXf6q854nR zIk-#fnk(v?d%T?a^ERg^NDPbXhiq7pF1ak zU#fjlpW{E5>+0#MkTY7v`CnP|CY;>j+X$~PDv?JWeD88Z+w+h-f`Q`;>HB0~KVU+h zMP)YFZ#H8b*1oR_J>cOQTC|%xmfO^2JdOBr^3wv^==+n==VM{pZ!Waa0>DM->5&o5 zySik6(68UFaV&bshuHnbq-ahx?6{PS2@rqzkV~84^SF2iSz~Ciez9;s^u;mCMH+Ha z|K)rLl*19(xH%F}5AK0Z+ldaBtxa^c9xxna>z4dYTi7DP}3I#;ZPpiR| zLxRzIp`c`L|FwpSQr5pu6ubk+WC0U<9&{g&L{TR%}&62JZ!{ED567M4u`sBMMj&}|EB z{QqQL$RmfRe}noV z(+)~NqwpvAG;fO)$CIUpEeBI^y-4i3hZ?4bBNQAVt^ z^gcg>2FI`-@Lvy{(Pq>QzaLh}ec?j+25W(e4q`N8sp3?C*`XAPWEWa$(h=W{~*8c55g5tY%=;!rG#^ta;A=hl#~?mWM@Kr2Cs2 zPX>w&NHy=k3^u^EIM(9hI#!%h?gEMtFPl6(hr{+u>9tIY`LV9TR7IUtPM|~ii{nCj zaoKpN3fLtDCKblW-+bvmniutQ8EF+NLK17Y8}~b>WY`X)B+m}Aj`t@2FO5>&K#qU) zY;g2#CPCN2F-TbWf67(Y3WSdz5hhA7T?=`%K6dkqhpcfp`?*s5OIxB+t79b(LaKKW z;tqy#s$MnP`%d{9douw6^^a*hc|)Qi6-=J8J19pgcCBaok;Lzkbw8fozFu{ruJqbO zL6I}DX^4TGm3NP>o}|(!-OD4FtGf1H}6lg69G(A_8f(5 zKkBlz;np5fsTV31Z`OAr=#)s{@ zn{LUM%O6N+x&JxK^zZotz5KuC558Ow7HB=3^<}Igv#+IXf7xb*pyNR50mF^?>X}16 zn1}xmihld?i;?@2HIOt)m=eW`<%QmpvUeHcLJh#sbs5;5ipc;S4AY56$F%?I3;dZj zP>WQQ4fq{4;Eua-{{>*&fSETara?0Vn0lbtI4oPW>^@p4)H})E2AfU5{5`eioKQm# zgJUx08$TWSz+gF0kd-+V-T;$P(O_JEEqn@f71r$@IxJ5XEKk9}h~*!wl&ogCO zUVb25ff(&JMD9DE?K;(+V0TLof}Dn)yY>p|viz%*Ldt$QT=qL=Nx0z{50ZK6z%)lfb_g0$jwmrVpJ8 zuOL~ge`1Lo*D_aI!Y@3r0Ja@xcNW6n#OuOwRW8hYmF*C~4@R`%n1}|sEUk~g z^Dd)?7LhPXX{9H-3xg9tr;lGBWhStJM7<}a1Y!$#4fAaBA?wMYxxg>0ygR2<+LdwGKPG}) zC1_vf66bwb>wO5R#ncxiNCX|abfqj87KZnp{PafH&76T^Avnk=z)Q9Gu~%Hd4@Mpg zPXDoMZlTr6edFI}>1`0zZp6{c3LFm&S-eh-03f#p2=UgdXf>3Nz&fl}udc0iHyoz2 zfgSD+;T-g-{ZMFGX&Q}D0_G^#zjudFuRiX(-cLC4))#Rd&dyu|Q_z1 z)6=lq>)-!ZwF~e|gui7Jm~$ojhxfr(`{_>>a~8|Tx~I_4XuMDM(GQxl1;=7xgOh8;p}@nv<=hhP2{$lF;@=+E7-=W zMIhh>`kw`66l~&|d3FCT*Z+U_T2b`Mxg3!$wAi#cpyDCaYE;fs{wO4Yx8m!)atORA z(%-AVc^N~ptZ;2lSYlKCa%6)DMB zzt~H?keJpv$MKo*?mcX;1fm{)XW)9ue^B3H{55)5+n}-FLx1QTYqWAf$%>TFbnB%E z#Y~qhbWHulKfBfEcew<48S}{D+<-xS>9o52!tqW4dTEj=O?=p-2xmZf(z3^3{gjno zOP{=Jsepei^7AJ_cGycGM0P#2L27tNQ!xo^ZhdwAZ5KsJ2&@4C2WcHQz38d|a4C_7 zr?3kyQNx@#3p*iw?;;$V+(4dy`q*aMzSn}Nj+b5$sG*1Mk|w7D;@{4VKg3-gQhB_f6|0$ab<$P%J`9CEW_-bpaxfXMaJgu)> zJPNTN`iu?8wtB+&c8s(RBtPP*hW!pQIN5Io$J5A1IA-o)j#n{6UznjOAvg>Vt2s#8 z2*DooK3ph|_|RpA_3No1rzwEwmS7L5MbyK01X3bk++djo2Xq|ysgvx<|6>OPI>}?( zi6%Q`3y=f{&f(B&2uvRc9-WaS2ELN*h9|#VdDxlGAqm&=v4ypl*+t-U!bYXcN9Pni z$|ZzcKZGq#_?ip@GvUhg7y7B-^k$ZC{9<2!+15^C2oN`6+4@SHf8k*a34kd$D)mdb zhU33~7nuZM7YMQlMNG^R=v@dAxZ>$fz|RJ9b0F$JpMMf zANefnmCRptYFKB->eR@YiK`de3oQ0xj$&H>f?nXP5L|Z-Mmp>t+G>Da=%HeFW8)lt3^RZ#v$j$ev4-B=RG32v^wVyu z4MqvXzyIR*6kBt4L3O{OUh3e(eC;pJ0BpdB9nD8PW$`Ddj~9FqE}Qtt6(ldK&=de_ zCB2A|P`&xccC@%QD_O#JO;uI1n;?F(5NpR+Stf?`lAB- ze!2o{9Yg_zV}hrLOEp)pw(~u(+^P0Q375&A2q)m)0r81Bs*0fqy*f?<0sRD0bA-De z*hEQ~M6*`4vbW6?z`O&C`+S{gUnU~+sO~%g2|-pCLOr|hEDt78(R_=cuTMZp@EZ7U z$2F*Sp#G$U`>W940!3<+E%#D6eInjM<6DE*$>}__d`bnvwgn(&&}fv}8P5KwGh`>e zNL0-39=i@hH{+Pki>~>#$N^3AuJ@J`f;dr;CgZ1!w+CKv!ku?IBJ8grBrofdqF?Ew z@bm2dDt`OjQm{-ER0?m#yjnFa-qq!~GlzR}u9Nz|rjA%Mz=;OSxmztLG|VHZ1;h5+kqsv|tVU5S9%I7aeQx-O248y(#L<+@#O=Z5vmV(1$UD`VcJ@zPMaIDRc&z5{P`Tm9c4C?X(=4s-UKl3*K>O!3Zgv}Yw@GA`&{dLx^ zuyj6hEy~%L1yT}-r06=dp*nvH?Ub+WCzj5htGG3p-dS6Xp(I?{lSib#7`@T z0(NQlPjocJ{rM~q?0hpgjay--T=4c!mPx2P0u2s8*oB#kvXpBw%-jo92U0-Ggty5Q z5K)!?%`$n{xUx_p`!IN_A^>z)I8X~G@issAZRI-r8LoNz!_ABs8VNqM0Z-@H2QS+< z{x(WtsWM=cq%?etD|Ht{Uz+@34;vu)p#6eS%!lTwxdM#*ikACZWU6r~F`UE1o(Yj< zK47`cB!ssz_1jt)1w?6nTWawzYTZyGXw zhn@3r+lzp71X@l48%}H#%YeICG#o_|Z9?|^42jfD!$yBD5~z>3Gfi;%r@N5!?(cZ% ztHZM&zC5`2k0B&5g<+$nTBh0I4P3xv@e1Efsb)X^(rGz%`0xOJPHJ#S0lmP&+QWe& znlNzu%p_nrPMCJA8pPEcu7RJc1iJ?KqBQ#%%cTy}AdvRW*I(d}6o{3jRwUjs==mKR zg{R#-G4VmtUnf#vPq)F^hHM>tTh2tcYcVuzTECBZC;1l3?}nN;sFvlDf$T1DMj)GR z@JI$?Ng=bdFpG;Ng%;)z7jvw{VG-&7E-`f`tU;QtC7I%x3z;D!bIJwdohWX`zlbCEdXU&tXvm1ms zj{*FBIIj+>3`T+zDSRtn-vbDt?+RKjO+7Oa?>|RE2T&FucB4tjDMUI>9(re-I{jFd znsN_IHGDXc2BpAYMh!kzKZ98#@7>Pq=AQ}D|9DJLBjKv<$3i(bVpoRIM;Rlbl`{fY zuYRM#&ZhlEQogMqx*aj!-yKY0r(vMolVo*Y(TTvlZ$?3PJ6GCc6J~1Znn>ECUlj_l7lrqwbcK zf55Czg6$0zae&iT?m|7jvnLEqYVj)b>JGz}5fuCNe*_NHI8ZWxjQR(o%FeyyKe4|w zC@qt3Am#RNCapU8;Qv8rs1$f=pD9=KI2ijEffHY%NDm@}!K%mZZ~fKTF>!X8PrT0K z=Mzm7*|}|Ks9knUkh&M7eSF@{XHDOFY12{Q)OH*-fYv%!Qh>McK_*gL%*E(I6|Z#Q zSF5Ggk68z90WtlZa^qPF$dQE}zTJt0Y`;?)En2iX!K5-?szlSl>?QgW)kzLDxj#7h zgtG2UWt`lp(;jL>#|d95WzE`h)GnLLyZ*8#*|XCA!TAqI8c+B9%npqPw|^e;2k9Z= z212e4o4bMi%woLiqX(~jHi{iC{lC)AJF3ZSYvVzNVH6Q{6r>pyfl*2jL?o0*QJR2_ zqJ&~WlqxmUL z_5kMR)&lQOUOJ2YO!u>)RoLSiZyv|5idWnZzzg;_@gvPRg8%;(Kk|2(T;^(6)B*^> zz&!7xvi(%A0f1 z$u<^(ue9+zNAOn)hoyZzPMc{?yl~@vZ&x<;h}0qp~7eiwgel%N>0!N^=U zbqAHg3f{J^pnOkyX*5ZZSSg(vG3YTj2=`%b@c@>$`+0v$maeE+Ij#Jq-!~JUDWR2k z@J~V6d=QlVmo44U8hy{Tzw~Fe`D@`FHztY6`hcIOI#O;DoHLeie8E^6p=5MjvoF}i zCmduWDr3}>X1zfu7ETgTvs@|2o|JNR_x`6I-}VDO6(-grx9!wz(*TNzi3NJhI3&&N z^MQLB@ReJb-gxV!BcYfR@^Cf^u!@4S$LKfyNYwah;?KtK-!Soy8`)RENrw0S*2YhJ z6QBA(=V|urBVf#~uzAsA%=7@taf#*WVdEEfL1O_|U=x`4l)Ufe2DFPq0Rq_rR>W(k zjvMN6*k&bp$E31_`>h++cZ=ffc50reP2;P;7k`S6`EO1}YjiIgXn6|+WZG$xB{yHf zL%chS63Ew6@sy=RI2Lmg6w~|u(Nx_+^?d|97`mE0fAuilCln}qk(eGv7mEAgo9nM> zcP7Lm}oWF@U$|QB!iw z7+5zJCf6Z-33W9e2u3Y03_cx*#9)eZu)2bZ)%Fj7Z=RUs>}+s%1FozZ&a)RTkn*bf}J{<1wYy*dNci{GmqCSH7 z#&WWWw1Ys70>(^sbG2u491vd`HyeQ8e12{rllDgHxE_&<`Cn1*Qd{a85} zO!@`?JS1toVcZnWiG>AilhS{EYJ(Uk)v_&PYWgyej>QnhjJ`*ag-tfe8&>WO*x22! z>l?OcW+VtCrM=e)%xVb_fy89ks;t^>w*6XHlyRGFzdu=`&9L(`RkI;s+VFtfPyP3W zg9-mw`Q>y#lj9qSPtVH zho1m6!elInd?w}ksw?pbZOX8a9ruD3h%y9{X2YutQ)-9bmK{BX$;gD-czFwXHrN#hcDS>D!65@de1skV)!O| zKGw9GeCe~y3%C(WOc>bmYnP7s7ed|yW@{ZoPtW+_`aOPKnNLL+;@XRxMjy zZ1}1{1)gD;;oE{@Ymn(YAE~;Z!)tZv3rzj2@r8N!nRK(_fddzPnpI~K9noAq#nMj5 z@ka84?XyBndgi&3SE|EO>TDpEm3JaiM}S4{ zL|r{sAU0s0EBmVlg!P5qp4ZnfkNs%}tl(dU2KvVrXc9W!I*U=%Qf37VD(S0hf8%kp z(h_{-fquOGO|kvLxBlrjF5HihlSf%4raXCdh!#S91oo7n$eq~tAke!RrV+pf#<*0R z&FWyHSmKduVf&eK921K$oPO1;tqG{T|Ktw274LYAp#<%GN>YVLDMui=_a_3x>o3{~ zRk#4po7x|bM0)5h)us$trOr^@2NjRl*spdp`j7dE)#twf)VBoi>(7M+O}p@NfM=eS z+2X;b0b6dTFNay$ya_YiX&5{2)P=%2K&7UN9FdJN{Ml12Y1wJB>&|f8&NVTG)d#Eg z%A-q*L+f7{ab}B?M{zzGqfbd%n4>({+ho)Q{)%?d+is6%L~F}|&EpY!z+svM1W+eR z?$!3YPKO3ve)4B8nvQiqGu3y?_1i6^{X%OxL2k^~5sC$({@};9%ClJu&uszm=`SF+ z0~cf;FIrwn+0koDS8=M}yhR%3vqG%Dnm2B_L{1!DG-Nj^PcQoqJ*ZDd$F7N0{R$|g zy?{b$NWV;Ni!KwB&kJ{a1pCpWU$P?9@z|-g_Nv&+;5?}ZP2;-Ce%q(79=UAq#>eoe zfJha%L-{p3cYJvuWXV+3a82zuh{SPRH!JXj>JWieYi*U+N-&`~w^wHs0>x(ylpptbn7IAZ)S55q2J z;odr$f$l9;-;{6D@Kl{-R<}B3p!+y=b0Xq^T6A|zdc&*9JR5AIq2oOF04FxtZ~_RRXVZ!tiHt|8pvQ>J`Yy?A<@LdI;p_=YFAd z=76@pkcMu@RAIaFsE`Z@8gJ}Q0ol4v;#XdqDQNrk^7uNnMT2uv<=I4f!^4xXuP9hH z=4ZUoo1eWq^Sbf8x%ki@CA5RgB2Rr|SEidL=UT3cV;C9EYUL@+BmY!rFCgaq7H-=h z<^h-!lWo>|Y2DRYwULzBR7JQ-WBr=e8`ivHbF>RI9i+qme4mumVV+hhrtDimlCw(l zn|3xRS;1@u0eQ+FTqw;2aJctNS>y40lV322dBC-Ca+{i1_@G{G1pHI|=ihnF1~dW=9> zVgnn2=;#RAH!4>g?F>5Q^M)tcFuhQQBR<(r%ABNOca1n-z)^KnhKbmx%(9A9oCwwF zscJUo{uq?iDtV>mc9;6{iA(0nmP&TOCdTxanfH8@>p)R=;ffigJjTRAp@z}QtWLAI zd?p`Ga!Q}jpk*n9J^e)?BV=$Y^0>dW&O&L4o5Z>1|6@l;E01f^W(_NANEesu1tteVh3^&eea(5e9d~XeL(7O)Kf&Spa_ zdM*M%^V`j}V-g`ksCOhyz(naUiE&s7D1Z@ZwB}bq!N*0a!b_ z0n_z)Wh-yP>zFPdxs>;q&^Te&{P1QbvM%Q34P;!2gQjYfxkoK%Z?@Gv9W<{HH^N-@ z)NAW2v5P*AubQR`d6UyyC4*Z=MN)Df?nJM54M)ta?z&p`l9BCKuqx*_aTvuFrgQLx zt@7dOupsDI;GmXqX_mGp+Ns$&LS$_8424A+au+%FINbKrRdjyMc$i<3KR`MIUcJRc z>Lb^DgW4KHKM?3nD(W#2R0m4?m1otVK`apa2Iab%N`=xxmrrW7{5>UC%P)U zj*7>+C7nk42*ubn;rou0N##n2v7l*gC(8DNC<#os&~t z43QWYnClp-+hO6DySp}4ZyGmy+~cgdx8s0Q)$t4F3o{>^4dt30iAi9*!5-afDzYmw z{ZbBM&$W)E-=gu4bu>4#9keawA3DB$+CG?J;1zt5oOg$HMMaReOPGgtEB%+Yo7^3t z>n0B;W=Bg8l#b|%<^2rBe6Ac_``Ar5K|HN+Mz9>2)1;#XN@n4~ns#lkQVUdwa?Ij;6QOl{ufzk1aFArD~F|#Qi zaxjOcK!o5Ry(b`Ru)dc*(mSd7!<%t?&-O)z_a)rbEjaUxlKN3B|X8DB)<6vj|)BUia-A_XYbS=N(t(5_d6`?@BUNW^LrP& z!>QPSKop@ZLt8B|fK@R*4oRBso4DkfCk*|Gk#exrHRM_UD=6BLgMg8JhC7mRHTvMR zb1>Y!OyktKyAy(K39*qnJjEg}Pu=CU=i?L&yIWV!*s9|tEou5@GMljQ=h$rnnkQ+6zXmDrNWf*}=ykqmI#sw#ZBYJizcj==GNnr9D& zI$2Jwb)wNm?2tP$>ApZed%k1KDX(u6P+k>8%3s{?ex0%RHvU5Db@_IlaoVav4;+;| z9CCz3(4FPt1-f%Yy!n====DSB{-`)}zxb!?4j)1pqee8naLaUG3k@_w7G_!fc%qtA zQN3Vew(w9+KfpQ!4Cj))$;FUdEyV~H>gJRdGD6r>C*q=xR!ogoE{&$rEmQRR2_?C{ z$MriIW4^2t=eG9v!++`mbGv+O(%d;Jiielugo+A{?x&sIYJVR4apB@Sg|tM^ab;1` z=*RmKiD$3m&t+7a)=!(#YGEHV`G@)*?RmauUaj1h=P}ojzMYi0`>^ffjlD47H$EYd zp%nGbBs*HH4Dabso-Ol&K| zkR_2|0#QC~Z3y8wmC2l!l`Ic6L(^*R^=+e@G{qk|@`>?2NKF?isqdVC`ju|IUi^I3 zs(rB|LawqfjR+%mB>u7wqi@$OIyj!!%wIu{r&U{pqJKYrswxesNKt8T!q0e(-P<8( z4^_m`;_s0A6kLYA{PhOdLwLsr7I=1>LbosU7$m zk zBt4W1Mk?`v?A~ws zKQ9gs2aNAcCVl9EU)bwF-7c`A-i0t67l3P+DW);H#bCMgd27^?y{iIZl`>Ag0aodN10?BWwKUU-?F_;%y@mW8)o z8Xp=iG8W{>p@K+_n;tdPp?yx7yUw3CT|{oF4elC=<426z6P2aM@>`YptPmKF@n^zkh||wGeb#1+$>r1?tG*UO z#cb-ijP@2VedT`fuQEO! zGS$RL-LiUO6A-HWGK7jn%tHu_9I$sr^d{KG*xlYSatb)8&5%=T{wpAF@BY^#74phK cV2P3X&QUaJl z!1DvIn~mM~uJgLiJdT54B?T}Z_A6{OG&DTv&!1G#&@jl+(4JsDdknl2h4$ATxIA)H z0e?g*9i-d_UOY7umlsDvtBAz8Ge8GkW7&Pyc0@ya;q>tHXwj6$4Gk@UP5P6#nw#GK z0+yGqNizsp?MF*o$_RP>OY$%JCl1M|J`@9SZr!(e4aw#YafN>pI>Q~J1_I=~x0BL2#PuH7&^I?5+IRc$_ME)k8A-r&j6erIMnC}-;4s^8S>X!@?l zn9Fg{QEbz_W$4@F5ERPKp> zxcrofe+pbas8!}5$GLplh95n_*tKdl?(~=hj0I|7;79~ zUGH%rK{RMv^l98?>zar<3EC{!HZ7 zy3q2SQ49f<2XcQ%6Q<5@F*EF9d6K{9zIRO|M`Y>w z`AC+5X5<0~nK@SV%;yI2=t&Eofe&p{rbOR7mc-?r?jz>)ODsD5BKW2vbdfZiK7FLZ z$kg_$@8>x()O8YJQ0>xeUU$BZa)nP3NNKL9DeIoR7KPQ;UA?{A18e}Aw>cYca=iS_ zVee7GZq7;Aaajot&FHKyYSudc@+jjk8F)yCLhS3})v7-QvtsK>FMDK(i0N5a;lWbo zNh<@}+&D6qVzVYdIIcUg<+|6XL2T&z$?%Z8mh+uf?#?nbUG?=fg|+*;=H@}FC5B&KCqXG>pJU%oMoD-F(VI(^e{A;!=nPN?6aD$6J_JuqOx}L z`eiphQYl*PqM5Lmn@C%lg(D4*5FtiV3w$i(7S`9fwQFA?KF5gj#PKwBbbj@L25Yrj zBYZ)l(UsaVnU+C_UJ+aLkZ~W!OC+06>mU`!N3iMCv5q=5w}r}WJKuyxiOs=P?xGKq zA85UaDLck~ShIU*ZrW!oe(Sy4yd%UX^$1+OaN`nBy!}5XV60$e64c9sbVc#oH?n1Qcp&u(FF*qTBy`Hdh(ot)yWxe!f=71XN?0(V{WPR`5wwZY696eNcdud*p?!3SWr&`9T zTd(mDRuV_324^*pKYNIq^R^|rRv&N-tNFx7QXPh$g*O$s|HV0{kmXHu`d$2o2Rasb}<8o@6K1c-H48P z-iSTH^jZ)tEE{M3zWOVA$t5= z2&bYA|MJ}`KD8iyQ+-S?!9ioUaQnsF#obj)yOS%;ig+e6cGz^;RdT-928MeZ+g5tAu3kErei@mWbXM%vGrU-(FdW> zo#Hcd95>-ACdR^A)oeOxq*};=53=6scEPUYQyaL|JPLmoA#$~_%rUY|MMF(4QhsyZ z$!kmYN%1&V=spT*hqRZNZQ+ZRTk^P>eSb1XJMcgI@`glmwwgAN((eo=bIOYqJpY{O zw<}UNZ3+jbf!uAtK4U8Pcl-OY<;_Mza0UWf{*xW^<<$EelZh2Ub%RE;2~?8igs7hk zn5(o`?d7mla5ct(=NS`lvWo6jlS#FnF;9}1BWZ-Ln&BG1rE^D(GA5;E&-do$rgS}T zBhQ+P^3RXuJ&D`@bIaK{y?Ej~g}5Ms+nIg!f%8+OnE7bO@qPe|g*i`G^sZ=AL2q<4 zUiof6%Qo$ih?3)fdh%;_3tTw(i27KZhn+&HcHXfzZFlT$9_j=fpNUh1tRRY7^AyFBO zPS1CZKvSXUbLoJ-^X*UzSMkCx|Ju?VBLC9$n8!Um8W4;_E9+rB%tM;)PqoC+k`W#r zhL9f>vXPE0ZHb1rG)ELUk7QQckzljc|H>gML ze2?=7d0xeju=cp?dHS4GrB4^Sx%8cw!xqN))R?+DGA+aR2l{Bzz08~?wNe|3*50S@ zFp-5f`(95fW|;J>MFru%e1COUP}BzD8dpyCI`QWir?<73sx-LxT{~m!iS)AQd?TSK z+>~W_lSg=!=6Y5>}plt0-HY??mi}Dv7%^=M=#w&bw`S5gW4$Q^XH_#G-!h+=gyO>1sld3#;O`UrS6_q$Wk`fqYLm6sUZRoIA;UPilM*?F(oB z5>UHtkO!%$EeYI>hr+znJ({|nrD^y1 z`Y5Zj>X2R|ABPhL*V+BwQ6Q0)ERR|Dx2W5so~jrQWyp3`=v=zj39nUS3Dm(JHlyi# zepXb(5(!%Wqu^?N4UrSr9mhGqZB?8bYB=>%9ZFJ4z0+=PzHjRD3F!iQ`kgT2WSQu>#Ae)Hua3p(coW_J*^f?(jt} z&<{*qOoyL#`PS^qwz}W&HzG466lhRoH)~|)S8sPvyvQ?&Xo3Zqb@Ekx2$@)35gW*6 zoTypUkfr(VXYZJ;XNlM~51+c|ai~#Svas;l*=`<#e!D_6$LH5x3qt0lzWcRolEbjk zZL{-}wPZ3yq3qij+_b+P#~>^p-Sy*DeVp{q0^jXf;_^`|y|qgfW9_v6^LK(=e$v&{ zLmHQbJ#7NTJ*}P+HrAJ-$HC^l^(&4&($-IwKzjsL!ffXn_IH1deG_?a2|(0Bd&!_{ zL&?rF&pZzN?lxodGD7}ApM%=+E7x_->@E=J;r85+0G+x)o_J7-{cf!GCCX=<6Osm< zt3us2UHb~GN^njr93bENnYo~fXJ8RmN?PGtsc{nQ$iAjlR}VY_LGQElgLK~YNg7nQ z2GNtbAH2V7lMtk@1hypJ+N6r^B9F6$q6;osJDaXyu6MbHRhw-3X z+UmtALyFJQzu>s+wBK{BEM;*e@%2PvAmg=IQLyOoAM{z^!>^?Rjg&fcHWXhbv68nq zukXe0Xr>=HH;X*{&9Ra$WEyaL=icKuk_p)T%?Zf5;Lk#(0n%vd{%#bjxikS2_?+=z zB44K!0+|j%^&2r4=0m-25TZ-G83V#DB3D0758Ucc&gSy;yuz>ofloi@(0(CV;+c4K zklO+DA>XF|G~)qpe^A4)0WN6I-a ztNY*Inc?=*3s1NirHV94W=#^HZFr34@J7+ADT*|QFG~v7<+dDVMR^J~Zs#)uDbCJy z(4D zPh}!mu^1makI{UUw!-B{Lu32`+{A$5<-w8xjADE&u1}}j$TRy`s+A1#;Su zcs%&YQrfCG-3MGZ!oIc3+OX5uAdH4aF3XxoQCs^Su1vDxk~sfnC!; z3J9GZ1#D}DXlHW}3z#H7gB>NHz*!8_F^X2wWn`d>Mn-hx8z6Cdq*!t>*wq?)YxWo_kTwT*mx|H!hGqJ|=+K??=i zhlYm8Tl|+vPwA!N)$-()GqtP=b%<+c88T|<_yQ?rv`@z+42oRgkwp!sg?3NSzP0cG zM_AjS%`c2}R{Msem1PF+ZiqCW^(uBaE8q)G9PLyY6hX6G%%^(?Z_F5iN@7(ap^wor zsy4#I4T_Q{F7?-+K9vb%E!ZngK0O0=5HtLy9woQaTgWq4nWv){{Pw_sL%UdQ!W#}DSc z&~6q)L74Nh5D)ExnLNv)9&fI&L`;V9zv$jhz}G@KXeLe6(Z}W)A8tZFTt*B~+8E~? z1mw4A@Gu2hJV6`QPEWYUmCx!JA4*kCvo+UN(SEi#-)DPoa=9)j@0k6UKE59hl1f*Y z@S`MbTvU+LN;1ad4<8yDRYSVHlfl;)^rj18t?`@B@w8R80hiJBx)%(d>I#@k{k@^u9H5@h9?VO1$=l;H^b8&1^tKoMmsRzwfvq4bNj1-q-Ry;HZ@jG?C6a8M<_bu9%>p(a~ zo5s@>y3rSBYZRstrKqW@WtT_K7MtNSPL)v9PO)*e*S}~PjU$85x}*B-H^g}zqk3cC z4|W?R4T$xE|L4eASqJq@1~_P248fh-=fgn27d2SQ)jeV=sVlRtm2dCcF8P-_Y3E!Mybl>sey-? zoHFFm@s>cUEOyq|QEZLpA@oLIdK3-~<_F43Z6n!~PuS4X*L*EVmFXSLg`tc-Yi57e zf~UE>j7&W|{Mhn9!kpfbY|qkc*LWH(yJ6;%$;#yaXl_j{(q!BpWPd*|xQt;^X1
    jz5AUFKfAI6|%q!-O|OrE)M_u7VbZ7%Hx2qzVjEC zegyayPp}D2y9S4f+QtEUUt9mon$p_G$}2-xOKOzkbk-hQ^Yxt=N6~mIkEqq7Up0G} zCgY-BF8dNv1thT4M_}AY_vYFy^*%v97I>?V*(jV)91nnAjEm~$^x`$ z@{(9ce^+PQ)67v7o0>Chjn(Mu0>`cfCK8t;(!g4Wre$&H5^lx-c_63u?5xBX`4@JJjAv#6DQ%T|rfc zO8`8=&U9oM~m`vg?#)F)t{|t6tsj;`nO~}l+P1AO1aa!LoJPEzblr4pi}c#DD6)9^WWH?LZ<6kHEyE%?;IOD9Hl zulZTi&}ZD>F%zwS1_6}`uIwjrT$k5+=JXpy=}jOjWokdD_^>wt|LA!2*>{AO@cb47 zJQ%#W*?O~_+@>Mtr~iJMrnJtx4rhb@wHwSS;#UJrTGG@2E^@wzAz~|qJ>XJ&knSOC zs}Or_(Yk!}BS7?aGXzo?GrLb?^LvaOeMC83?C9^mZQoe}f`d+$8NC8mMyAknv?bDq z^ennjPE0WqHc|rT*LI(hT$YmaBhZ=70_D_vfRxKRsh4{__1#^EA-QgRvKt#V^L~{0 zRBj}Kg+M4EqJR=hWYdwkx^n>Z4`$C(q%*L^kHiR&&--ke4G$9ME?>UMecsdMYQTHgsWCpLOzUXil4o@r5XGcGGZ_yQN)YVpD z`dOi1Hq2|^CWn$4XUBsO(#2nRJu7gx^WtNO7iC`K`bE#N6_wNY0pcD}iL1bS>-|$6 ziJpGR3dX;K}>z=>v0`&xO;_I-ma{ zzNsjGW?pnY+L zTxYef|7AVt^7Qaai72&Me;AH#f-;42hnVai#&Nl;;}uza{06dhNA=fR_NFLzf<-9& z5{v)jA*dT$n}LcO!P2s=;y`<)ZHqi8W`y1$IK^#wuTLfiQj(gYdT*iV<7SfCZaz(I&;;W?LqY^B4l^P(9s7nK5-On&l)sBz z&Y(bTUo>mMR%eS_xw*|*S+x`|Kbv>P?V{r~jZ{F1R;RFAaTF^AA&s6JG2Swrj9&8dMB$@gUeb6mIsSou86(IZt=aurM+@GbMF+W<9Cw-E5MN2 z&W*S7fQt>1wuQ@C>t1@9Fc^%`mvQ<{oUx?ytQ%5s{S09v|=4h}-k z78`5zPj@Uq#%*$Y+B(5}W&`dW_GInF8Lx0mmw5N~Z!x=X3LCqAP%Ky#LP?$Egww^* zZzt-yq@&^yqR;z`6A>JLT)_viYmG%0nZlLQPyO0eLl&&QLx2{gN9qW|3RH%4&3r%4 zgfOa!`)a524nlLhTRb17hRxdRCcBqSL)m7bGgFk$7D0rm)t&7|P9@n!CZlWfXsuv3JB0xfviQi zZ4+tjYn%E%Oo{geZhU^Lrndf4**Gp$%08iA^r=LOS6e@BH58{dOt@tslq#=o96dY@ z#Sca6V6b_%l_UZn897b3j^RZ=4lipZu65uj@%eb)TDg-2WSkg{?@bwfFuU@25^5LrW?6yK#gnp=x^NIt9cELmGIsP~{BfyX|S+2d*IRA&Ogu07Zy2?8XQ znY|%XVY+xWT*!0Siz23arWUO60vwK~W=QymJ6!M(;?)DELZK)3+z3#i0wMzOaAqxCFYijSbrB`?CSSWFpS_YTyZdBY zW^rUJvvFTl=uMd7g{di{)+%6;x-;B&JH z*JmqXNE{%7TwisF=}#;crwJJLOeq=3G&nfBI=BSXCXA(?!I=N@Rvm+>*Z0Fhxne_;qU9|u+BJOG zT97Gj9BRdN-frF3U&`CHU3N)u2Ho}N!lOWl>#zyG{3M}89Z~y^ht^c3)`E zgqS@4w7IiK&s1{KFCNL(=P(%Xw!+wEIz;j4#)$(X3I|HMAZvXda~V|f&%p`KG%e|> z3+2#TkX7QZnIcYh9b^D{W!+~6mPeF@HaB{8ghje^<pRS0mF zBdS-`JpGhVplF4ad2Fn>O=TmT|D;oIQ*u|KF}k@6=lw(Ha0}6%FgJfF%yHa_(1m_P zFnUmCU1Sm zCFxf0p&fmq@2E?zV-I&S#(&3!j4nKb5 zSkAT)URx_%Nc|hx5be$7&kN{gO`+^!nSDu2*HyYWcWSp*00>TeH?22R}wJ}_T(bJZ0yFM!$i+Kdf*jeU!S6!x52eGGREa1N;)O`?<0ckomuF(LKx3h{AnXEE z84kd42iPQVP>xaAt5DYp-S*uvprhKGULr1#wD6X;<{)_o3c1Z_xwgI74<=Bh;*KXE3z%#>VbUUl{3!>zB3#*wUOZ)@oa! z=ol&x#CgSgORs?KqQP?5z&QUe{O*u|$z8C4^mnM7$n=wbU`%*G{q`vtE@MOtV}qz{ z=Q<;^JmiITt^b;{a@E;PW!l?}vzh~^R}mCg&o1;%HUxY$Bg;uy&=|;y!C2zHlUW`| z=8m+Ft@2U`8HcL zrxq}+rME{l;M`#I!f>HLfGxWiyF*OgFsxBxv1UF)=5NXb8ko-(;k&>0aZVWTQdY#pEiAb}kctNK5T4pWENE!KQyjBuh* zhXW-brPbH{DzBH?-}g%x=pRK$Bu{oO18(V-p8Nc+E?WC6=wp2PpvQ#+v?C(tB|{zh zi%>lvET)uq>88j~{x52osT{s}x?xYeP+=zOymxshpQ@jAJ< z#1|bi3Agla{fz?lq9Bfx_d5`S0|l8FO|8sS$gR|FM9Z9%dV*iqUkky&4$z(d-uQ?gVZ+W2`V1&8(aYSZ^fW%r)_C z8o3%NpWyV9{d`!dpX$=xUWeb?k3g)%?Ca{^4f$CA$f8b)-97zYqk8xcAsVxrP<>D@ zme;kJZ)K73Ilq3QHCj3>IxNP4hp`3#gaDk04M^;ZrNkJ`leFQF-^yI{`$yW<$serS z1W_J-Nzn~z6P&1C87PmVa8J96#;M+Sd+mJgXF4E$mY$Gvc);!?Z$T-SLou*q@ zR*d-GqW+1l2&bMP6iI(ozQ#5*_b-Sb6mIFAPFcM;ad$hOO zxfNoc1UdRq)%2R|%&wuE2tbre5;^JXTdsf%uUI-yR&7bs@U}C_9u8q~liMpY1gl*Y z<(Xz1xk>V^I|Q@mCs?eMbf}FV-T{DyBiS5@xmiZzwBpCrSNn6Fuu#PhhFpOTZSvKmo)f{v?&`vnUdj2Y@8Nu*9GP34t^l{ZW1IS zrR2{h4@qcBX(72SeV(E3<|bn;PQxSn)S-(;H^&%69Mku=o{p=5<#A6cbBa3zJ_}so zVnW01vKh`DQ8TUn1gg6BctTR4WuSbcF9vo;>|HTLH_F332TL$lr_K|RP|Z#5?=RP# zDyC~=+LK$zY@DLAYl2p(LeqlIKmrY&o>B=ggD`Ohu3kA90`FJQ#2B{x2pcBp6j5ot z$<`|-5gLnP-=q*qjAp5#M$gJ8N@>7n36XicH3lr0 z{{d0x6LCfgxsdCOS+Ol7A3-0{XQRjw=?|p6#7{ zGnG&2)oV`Ewqe_fSZC7^zAn&Tx~*5|o()vlKM8$_=}&vts{4QxCDU{to#Cl0d>wCR zWC=egdmcFLtoYN<(f?uO^#<#UNyJRbHFH=+1yJHG{fkN=t$6DiQPqvU$A+UI(>pC8l(~YEHG<8ca2*H0)K1Q;jA&R^R$h0|3v8{JF21e zjlZbk0uH@!X4!`hVLUch2cAC+=?%J;u75F>WSt`$jUM7npA8o z?wD#)6pc!hw5|k+oXBj@65|out2-5E=rE5f>(}IcSqaBo;xTmqk z0mN^9OFFE;lee}*U{{q6hNEux8O_QmaixVIB@MC;z zJ>|Ju7)XFPK^-q2=qbUHWa6fMjpjWpko$)4bf<{}T7r*{nIxadUVm>#H#fE0F zUQz#RwqwGg7ZONSK@?2rW;PXS-HdYZHz3!b2a=PoHFhHRmNxp2-iz9hPhz{s_&AZ2 z*sf_T9I+e#rhLZlk#Kxt?=Bl9boTq!`0cdx1>Z|L0U=gkdhAz7x!Ij{up2()8e?-q zu8RO24P##Y#6Zfc+AX?#qoPxe&L6Q~f?`68Mf5E57B!uxe%occ@!0meqxWb9Ca%3X z?TvKPKRO>$z=zP&-RgOiH!@a4CD$h_zme@#D!rvwcMV(^`?93@Rbp=xTj|F8x_*9J z;BcVxM`&f?-mg$*K0$SvY&{$qY6Y3qn`)avsK$RZLu}HRKB$zwqQZLlNF)!~^~g{d zjB1Zg6&C{wpZ=j~@sA?BceAeVljHY~ek{p3zy@YvCT;u=%0Aw$f;By`BO2n^VxQn( zrmM28yjAN5Qrh8N{21Z|V^b>eDh{=if=s1E@;wL1ZElTE!vFSty+MBz;3!Y*RQCFQ zpr>!DP$iAqFD8eQ2*q$S;-I*6SVQc4wSwjG7@w9TzYt7C#1r?`s~$}$LfJ|~ou(p- z0Dw1^P_e%JY*y8%AKjzae@^@86-Jq{&LfRh-!mc826WV`*jP2h*u39cbMt=d;DxS$ zp`enLDYH&k1CtSs;Mv~QbM=L@=8ryU3)Xt-BoAxWGwh+u*H%b5WCe%Lk@{J&hoRH_ z9W5&R-=gK#^7UU(D)=clQY(}?l&A&0bG|b-1Vv!@UK-~us==7uWzAW->~rc_3^i-{ z)=frzE;bd!8H}z=TIDQxWZLh@09N#VE*Q(F49LxR{FOv!Ygv?xb3TSbz@im{@mvOb z5^XT(of(hJZO+T$2e*>l&}}!O>y0NiZMd(hNgcLklvC*4(%jRwYfZaitrjr=`WqJ# zP+}ZC_Aryh)zIqv1X!Kb5S! z;he$qu>Wc!=k{-LZT#<&BLH&O+u70OFxlj@pSQ_Q!0*7Soa%lB0b&IgBNs#eh4;Mt z8(oHC0KSU%jzC<>7fq|4Ej4_|>yXm!sV;tiD_9AQ_xxanSYd~QsaZvu)7d|Te`Tnq z4)iC?Z~FubrE@R5((@3RhML{2o|H56Bf)eQ34tZi&-?7!5RCe5;EJ#Whfsl>hL`JK zr2<%)tZ(Saf0}$%5ubP~H_0s%;+juH`9`S4K=3K45sKlX3u1~V(y1?l0e9;4>tu==V3y|j-wNj_3AE+U2SvUr!lkR zst~JV0_%ZxZ!fv}9zFDw$?6ld)NMHVDd6(Zhj6QlYWWYbpbf3u#Zh1g!(cXso8=6z zTPhyobr=J=z+HJy0Q;bZY^UqHTviw68L#YYCQy7OotO`eS=z*1Xc}xVn zj#tX4E(^x+{*Ajh1Wzy?8)H$d%&Fc^Fn)KXX2y*sM!rP+8C2sLPqVr;b2DQb>Bbxc z=jhaO6ILpG`{h=00&1kgpQD^ABun13{s6f31x(QT#N!k`H!W`+uo5x1Lypl~M`qo| zT_5xs3;Jb|Hd)7q;UzS%Jk5nsD(;1N-zdgH!DhRrWRuff#6t`O+zUeSfnW9bxfg&o zz_AE`3hlDX<3^(czHf9{nXx_fZKU1z0Ve?L)33f$GY&9A-2c{VwGJgS@lx7j1Vf+M zkKSx@lVedz3hvDd_bF4=-7!{MCQCk0$9F?JGP~h0o58WZZb~YkUFh8uas#iUazC>P zDE>ouxBOSZg#eYq?x|Vav`ByoNz)vezOO>*(D!V{eZSZCOgoY4M+^#=qY9bo1Ny=> zQ)3|_{Z6~O6>mDZx0S`FD*HE}ul7Q@>Y9}QRnm;;O>QSVN3S6IPJ;e#icJdAbDBQ;%P=JX9 z#3%aF-BNE1Ep&la-f43VfY-Nsn`HH(f>%+LxYk42@RV3a%P#go2V)*kTd$-Mnq^x2 z2mHb8hymZlaz^tgO!kOen#ys_DG!qHi~cLv*TfHc4{+B(6agh?4l#SX82?Ebn4$7T zmViWsUJ1k=KsoT|7kzF%Qa=~8$(TRdycI=ESDXy&1#rUDDl^uv5#(&;E35 zFvZr^3T&SkXm?2i3W@(_6aK5RAp^^WxQk$MU2GSgGhn8i0(|Qn4JPLLN|ZT;w>8_? zpgwtfQq(wDZ?4+qgw243qq6?juVX(7be64n61n*k7x8nfV<%v>G04%20Ag(zDTLJ&pQbyBoBoxqb ziG!_y!V$)~BSZBC9rMa>zXQO;X|Id|D7*`jqpxiQ1wP~;#i8uXN(jG5#d!PC-qx@p zU<`R62ayn<@6)5=e0^mLP>fJ67qz2@0oflV)RxB%`tu8<8t|^~9|xHq{vvP+95jw+ ze#+@sv!Y_{{W4YPe*9AGg9TjA*Q;r-DA&K{K`!A;gr3{iATNIYap1t>jQ2;ZdQ;GC zkq!dC^MpNEJV5zw>xK^*5zpjgrmqG!B;nGfzD0i}hngULhH_2lY4_m)Ln->`LQhl{ zJHAPSJl|||5-1Ft|Abv2YU?y<5#Xb{JZWd^StVW3TfVK1S}KM+kT?OTPLUl`g!YgjD=)uCWgAZi{j5iPOsA2H4aaF zuLkorBrdqyu$A~&wn*BNK?HtKuuf2#!*AO4NH+_DUfuz`YVDO-|3oA<5`oY}ze5jYrH6I1ae zJ#8{6)LY6;e2Lc9Ud5ISZYWrsM_sGr5z=tvlF^N`@rm$E)kx7|G%iceXRb%ihS`sy za0vjYWmA8X{Y2okB2q3Nu!F@57ywzApfaD~KX3L%;qH7@d94BRQk|9wDtzp&`a15P zpZDR)?vQlqbM(fah$FxrQh*{j$_pd@7|Bu+km;{4vbt{R5xc_UIWT_vATwF|3gp}X zip~sJyG=!(>5AU<_}J&&7ixmU#{HdMirOk_@WrtLBi+#k7b<#&K}bq$)+u3=M4N_` z-&i^IlY$p5#QKkAZB4Ji9E$MJ$;hfXp&WsXjZ#`STb6ud)iR+WU_^XBy)-Di8Pk*b zXt`ux14OQPSq+oTlZ5J(^rM4ePWg;Eom5k+)5V=X0x3 z98v`dHr9*NfS#-pc`XpcP8RS&fV)5M!oVHdBMTV3sBO-QzC@0QdgG_*DS!_zuhJB$ zRLJTO{4yHvwS^8wUvfCRaFh`>$ryyLs9he-jFeqIjqHc!$VU1#EBWHdV~Z`|aNJ5E zH?J>@dUE3BLPa}YUgsY8pI?11NxYgXpP=E|wU0Ai-p}dB56yj5dd_$gEL9Mpzsr8{ zovbqFPp@^wg>I;p=$n#f93`a|7#_MuSrUAWnF#aC$x!BrR3Gyca#PjYauK%H^8|w; zG^bWZDv{=wPok)uX>!wra@euqp`^Av^UW`=H)`k`vBj^o_=R_*Q*#w=BwKOiv4x&I zC{j7xJwWkty>6uvx>5SE*M`t{`cr^3=FmqBN`6-7=c-v6xqZ**-)<;-O=1=7W~e1b z2-&j~>WQZ9`J%yHfF9J&X#TN5^PM9<50m74Of%OWKba?B(}Ql7l<+}Og3?S=-BPJ| z5CxnZoi!x^)Rs$tm5()zthQIG zV=Kptwsv~=h*Ug&ywdRI_aPpL>8P1AvRRpCF-gCfM2?=qo7AG@(Bu)4cn1)ufkiXL zR@1C#_leIt9hKjeJdgdby#JwHHu6$yI^ckhq<^m#KNb86<2L**_WOTApjmVfOrQN9 zSqpnYN1uh3PeF)4PFqkGP05{X+6pqy*)o2?qZ6oj2TLq9_bOaWTYSdc0o=#4b$h}M z(1=f>*hn5KmhO@yq536I969j}>wgy1rDz9kRw)~fjs9jPRNixT!vV?aoX)4hS}qD? z4}9!KX#)B0e9p3MtADtz#jG1SWDb2SAt!!^yY`2H*hH|tKj%Q5cw8XjWn7mT^Og?4 z4^!H!ycUqu1$2OA5if`59-v|>vFa~QUUh!02%wtD*E*K;Ic+=Pyhw;-Z?BQ5+mO@_ zt_&$6kOW!$78C-L0X6b=ZZ_yZ3n<*!nRFVhv2wjWoaulWggCBrnh}k!dRvL-C=|%#DS1$m0#MOoluJ_TbigWoaI!2DUtR8`RBx^|9rqwc#x7%6&&PJn}d%tCYP_-5}r_m*jboMshx8|eaXfvJ@#=0B&mmt`HW z{-&p7n0}~2cOIPt{5wr7>gG7LDB|v}+jZbpbux4yl{ge>s+!Gwk){-j)kO}P6XOWY zJ#Z(>enBCxQ){m>oWQn7%*3Qz;|3s!?kysVoJAk$TgHXD@`3T~9YyRn_A0VQ#-UIa zbo#!OEdHJ%d?4-UDG4_t5OJIA8oHaO0OUeeJDM=b7?y+uV*lmFzu>AMD5op4yD-PW zEp!Ww?O?0e>MU2sFdQPfWs+v&fQ#;Y_-=W{8>oZ9ErBJOz);$>n`QS_Uub2uKkqm`GQ!&^ z)&r9`lpWo>@n>%7d+m@*Ki#p)0Qhm%molNX>!-q{H5z_M$1Wx<9Z3pomRs3PP4@k^ zIcf9M*JpiUgCBq0SQAl3_FC6zbTvj$pM&GaRfJaq`)pIjp4-$~y04Cm7?D!C_0-+c zcpbqThZZ&iDnO0b_3X~Im}Vc+Ra%kgPDdGKEo6>p7WtP* zfl~LlzlviDrdG|Ga@9_lAX3|u%jSPGHFoN9-ywb|>m$F&Q&NfrS^E$fG?5{ZxPD%Q z=v#f2L@bi{(=2&_f}&l+SUFHXPA^HjaN@cDrE~=#`MZ_?M5;Zg$^PG!fC?-E1xNsD zwh9m9l5-NnE65gk@sh&IP5DoVgWtsmsE_`uycRLvR#^2WMW;w8D37cPK)?nIN_^z9* zJ^ZUiq*!zPJ6(qwesZt5$FP3)gtA2fK z6l>A*J&(@OI1opjBwCNk5#h8;*^{<#HVr)wuiZed)Bxpg4VMF1ETt0Ce68j>e1k~O z`}9^)n+}6pjxZ{xd8TD4o?y*1eNd)WW32gkkZfa7;@Mzi>UkPrLLdcGCvuU6_V8vr z0CiDA6F{*LyKL@bx*5liV)yaruvi;v_HV;gcNCvSY`-hC)#Go(vgsklLFLq=xl*sH zyf9UuQNSj@KO zeCE@i-}0TzsE~4GG;z>@IrYFYd=|@nYb| zy)VR|DY~mAePAw7$*g#fN_pfVGUUd+xC0FFo`uq%0__WBcT}u>IG)~TrzSTh^hz!z zJ+`(|(;m1J9x|I2F(7R_=95Do_=S9hrD7bsYXojjeek}ieBw*Qk8(gJ5xOLq!Ta#G zbY<1{`?WoXPKp*=qTHswyRT%T>RJ0bN_MpdI1dl{n5F`Tk)?@~CR)tr<==c)#{cAG z1mzUA9itMXHw25u2rX2)ySt6MK-^xAywvfm-tlnc0+lXt`sWIk$;!kKFsTb{JImI; zoLPjH7Oii;Q&nNh2FX$-UFCKd%ePvBa|<9UN+T$zzK$Ki*EFpSyDcqLoZvYqXS%`2 za)7IEmoX8IIchhKR7Vn!rrWhkbM2{R3n*#9;8}6K!?9?+76(pOx>BdWH39u@ZYi%1 zj2Ae8tv4ApRn)^9y0)_t1N`+MDw+t~XA@6k35_e?6sGln`D+oJh}BKJqA2Y96r3!m zaNB2@Q8DD4456x4;+jXlX9PuRNz&X$A9)+M@eR=&%C@e#>_6nD&X!;-YL*3B?kcy2 z=)Jl((jS^iuYYBUQpcVW*>;dP!S2$RF#eZ1p_2&;T=MoCHfc2_Xm7JYp04faC+m%I z+#$WA&l!pi0$z&1ErQGHS>4D-!6EQCHU7F$R7`h~vtJpmfZOii&Cyl( z%q0taHb~3AgX^!J5W}**^^CUwNy4>Xwr8?X7Y?f~WYYu>UTZS4-w>YHV(7}VQp&PA z@+)rZ>vW+?i(T$wO{j~tqqT5kMq81YjDVw7M)ky=sqv(M%6HH`r$SNZn_E6s7*UFWTeANF#lkL?msD|wbq zQe;!j4dcOEp(0u7zl5qYrHosW?(+BTL>l7nx>!Gt?qTy~fB6Z~_6z651ryN_oXW1o zfb1f=or;L=em85!oFO%a1#IqFZV~5+luA(DAjVK@xta0Gy?v=_&MHo7eM3W#O_@0U zK-iW${j7moNXtj!EjL~{N^KytzMu(<3-pa8365@mttUI6H`{Ia%(ULc_xdob5?Uhh zZm$|&iSCkn_};>ii%G@ytc-KBhF4SPZAs-%@wNStug__@nQDKpe~~TrxvJg&v3*({ zot$zG?We(}CC8-gqx%KG)n47j6~?agJ-oO#YV>!a7oy%T95uTOQQ?Sc*+n@Rkh`g- z_VRrhxu;RRlMVCDzn<)SmXnxzu>ttOsa{ku*rl4?rkYWroSAFVcw zCAiabUqtk0_;KRR`a%IbnFhDXyaQ5tfYetrtE-x6 z>MtYS=smcXTzAWJ4zt|Pp^SgnY$D`J+&}&YLixeFIq2kR+0{sXfYMBzo@>MasBSi*)l`r)ypRoG(iaz6#BTO zAZIS*TD`axU1gOs!Jys1 zl>c|}M1rB5!KRXM$1|E~t3e&@H*S4&urn$Oh`TeE?ivpi7B_}fOuWA?H)J&WbSXonZM>=Z&d@6r2~e2cevI3p}m+=>vTT1({TJ5$DN zSW11C$|E5X7_BDcWG;m=*O#F}sfOIMiB*PL_&KF+b6YjdKmXZo6 zEU#AT!!aJhny8${da^~u-I()Sn3XIZGcQ)VY;dYj9+eU?Ndm(QTjFvF%5 zR^{%_*j*@I1qRCUMYdqsKEKUn6M;;pd;{GG-*=Tm+WD%HWvOOLgZI@JvR+^rHI%bC zC)M=3Gvk;V$BZH)x#yI=o3WJDCI*aO(c2o|vyT617lx#T8L>LD25#b(5cRU)rr!}n z7+Zd!3)~RwVZ4ilvH=>t!trvq^3vLQJg?$wX= zLV?|8f8RnSyH)oH6rFk%Ct@*JdQ|S&NtJcbJTvpJ`V#hGD7+zen(iau%~ z_2f#KD5WfeGwrgQF5jf25*kB6lL$yd9$p!}M z)BcsSWlT^aBq!Vk*jx4A9MnCeOI9{u?6#%sVTNa%nJEQ-k9b7P?#yI8(6sq9^XxGg zknLw8+ZaMBjGDY;>ndZsgMFj#9HY0wlrHW zV8zm72pu^mDiIELo;`taS${Id+jaP&?jRO5ZmJjDG+0pJZpmm?T)c&Q#KM}7jELv$ z29@->Ag}*kv~vB{7;x})wTpZh=jVxR6`|#0KN7oMr~4qaGwa6X^3@aUi%I2Ip4zuNT$`axbtvh)WUTz$Z21XV z?p#g((&yynEl_?mV|S7jM(68fo_gFZvo-5RcLetd_IImgd3PQMrD$6G*rmzMD7y`o zZ7dyn&)QU#p}lL!{_S#&Kp(f3(~uzK;2d2=bY9YQnPCITR*c&|H4!chVo8*^&o*~{ zvqouWuEKp-P`(r5?$zTL%DHRF18oA!zA52q140dJ=UdW5!Jb=Yx!ne;YIhoUWZRJa zRDdq}d3|}#674e0-S_;f1GUtA74 zUgYd3&6j;__Y6PP@d#E}qf3-_^~2mb5Q||VJRk?0^vJJ0OOSh+Y%0HGe8@kQ!RAB_ z#I!4r)^kx?O-9AKxgaFYUD^%e9Ml4S+|tY672Ce>cEld{xJ7TkFXdY@^I|>JN01O- zz&fjFTUai|$@76Tm5P`7Pxgmh={JmQ9mYnCQb6?kv%hmH!X-Je<;7O(anabF9&Qj) zp080t@c^AO4b_kpv1JG3g3_5kx_(H`ifJ01gaw3O;^jmJO|OTaWg3`JgPVRnZF>HW z{Wl`3rG_#s;8rzp>W`Xd*Kt}=%N9v(4MJ+%yR6dTArjAlLlXo9p_=^nf*0i920ioP zO+D@rcX{g9LukcByGx_7@TLHlzb+JCyj*!kS-@nVl3x1$SJN%!{hBV2K@s$3w$xaS z2X?2f@J*KV=$ku3QM!V2raU}`w+VbIV0Jnuo>%|8-<`Z)-5F0VK8cA9{qrr|{12(x zN#|GTF?4@^KLI;&E9++MWt*07hJ)(ln!qOc8+=1^vv~>3FMnmepJ8KPwyZ9pqyjyO zQ|v_ia5k_|g?*HsTH^VT=^mGV7M9ELwQD{xxzt7qu(bnI2CosxCj!}NXSBisSa*Kf z`Cq)8yKEQY>FC0dqjL($r6P0|)Nm_;&bM@rd(hZ$$(Ua#zAnq@Ub_0heO2_)>DQ1x z*Br!|3h}t23i?OO5=eGJu^J~sNuFZtCv{M_Nfd2)R&fdzRrf|4u(VZ5ss53$X7?2) z4P}UvTV6=tHo_>Q@Xk`77Fl0$sN~Xlwo1Ic>p;|ckXp(ceJdXJ{F;6k(GtvT`dxVV z&c#OE-I&{;Gj~^L1?ieg$jhQXEOyedX2MzoZCKC z5vYbR)Mfq_&7{xbAs?h&(q|sS7x4kIFiG!a9xkncqwKC4{ii}$?F(%twqE&Oq)%LY z{_EnH>8JD{1A(Ap9M1rJ0I2r9WHqgGvx4tPthJ$@cK0nS$)-S7d!ybbO}@(Z?1X!( zI-oK=fX-0w23PZU&I9Qw#_SJVuvk5#-;F0)9_)m$TC%n%>C$~*-x|n^nZz0ys>F$S z`3wv(JZ-5;5)$sFlx65SxWGE4ck5XDZXG|P7s4Uh6j0O+foR0p?Fgf}LUmRbv3%x!U2c|mz;V`-z!RKFsI$sAv(m+vSf&G zXu2T?h8FE2n(j#T!D7oTXqqqE9P{}zWXst|R_oK!`718Q5G!QSvF7rETsCdCL%Nb)e*MX7(fwU#Ph*`9q-v9zFF-^+k%aLCI^)&!NeA3sKuG zbob*_9-M}qc`U$c$^}l+^G#C#V2tJ<`VU;kKn1*n?ftUzxmjlM8m|pK{bEWnlYwk_ zhKqnr@7r6B;M$&FPObUks&t(WR_2?xj6?<{ANP?KxcaozP}ZI=`7R?kPJoyoFtl28 z%j!;@?hE#a)u*37mvFmM^7r=RgxFzr19N$pdnF3TD6?PFb-!<+`U0wt_lMT}uc`O$ zMVRBB3gm^#+ecW`Km5k>#n$9QWdi2%~Sw5$QV-T_=scu>uVsM}_P82ad`w_0v5Ky+u28HMh~EDdA}98Cuwh6oW_Df|tLj*>4TC zy4)jjroP&!$m#Pnj5NGZ{3dXyFv*U;FVm+I(FNdAKumFnP)G-aP-L^rRYTdHwNqO6hSowOuvz?dqRR244{Wt+Xmd>!#v*p74TXYd)$X(~ zm%!>gF|W7Bq}irg23ez7L4N71;V0&u-_uZZ`pG~IUSzP1&r?c1Dj|^p z1LDH>eZE}_llOQqlwUIa!!Y2YChV(b*?^9A*)gB?Syfd~?U@(}_Dh7r;*nw*n@pmJ z@@?eOJK!h|nZFmI)Z-&qs}?RNj4+GjK->ymZBkPD+&iDyt#Sr(lU|m- zMavraK*9nh8TQnC;UYMVI(EEe-tG2Fkh5B(r&+1-p4Uej=H3O3p z_FS$z4ISjYzX1CXZI82**m{{4gIS1s-JD0sxnNELNA-e)_qU93Nu% zMS1IkGux@IvwPp$UVMHN4!hq#1LIW&FhRPX(&|8|Ut$<5CCT$fV*AV&-}!gSnlZcD zq1Jb002BFI9`?F9_n0Kz1*uzg0UncmdAx!2DPUMi())>f{A@mN?4E!Qw}?!qr>*X4 zDsgI@f)TkVFR0|+iFGn6QdT)_44o()qL1-FXYPnZqKME??9ya#IYuXy8?@cnMf4u;x#k zpWYOwI%upTyNtLm#8|70rR7N5$Z=|2DVmIb%no}z0OdODBuvaCL*p)1R5?G+yVI;= z8^^m>JpUl{qoR;!)X@{t3wE%1y?Y_5ma-NFktd!Xeee>e9rCyo;RoW|VUMpa)3LLc zvd@{8UWI+kGOddMATX3PbV**sg>d|nkmHuh{<_?f7pShwQp!XOc@{FMG@XU@Bc7VnO+6fFiMT6m3{~j(V0o9}ta$H(iIUbm@ z;ZzOYjFs$tbqme%pbA?5y8h){Gf~*r@V`fsCSrWvzXlPaawujVFx<+n(+T@ z$=dB{QUzQI6Hf#oa0TC6#m=gr(oB;=#HBrMrvkwIpJef$r9zI^S=PD5qd1aODz0AD zxpNsHWx+g1or&dzq@ku)XJhX?%yiE&(U51D^k(^X+*j6bsklXSQHEDhKZG*mbA5O{1whnr5a-d2iQdC7O8;>ZnyrqoGr z7K!&MrL~eF$5|7BboBJ|%M)U~M{rq$N}w;pz9!8+jtA47Hn&PSt979+;$ zPC>m+b3p9zt8O|0@8IS#mOHI?E+^+cgs{1QRJ;$G6wN0y=H2m#`<9|}4eZI`ur?oV zMkKaC8a{UA-}rZ)=(WB1<>|YUpN2Lf@KuC$1K^PE`zKBXpz%b8_8U5^$_I{2JSo4V zAarxju|Gqjsf`92dLx>LWYv)!MQ@d`)_37p4Zza-+3&!}YZ`j9w&sgN)AvMFh@l^& zWY<%O^8skQ?MoUL!CwF*1N!S1Wy=-;6nX4-2&$o|J7|Le;E>P6mj2+j*a0AS&!1iF zL)55+hhO>vcor-+qK$v6R!t9pme<|MwRi>YD(s^oxMwy0JlmVe2~J8Xl ztW}c(E7cExPChZ!RNfNmGcOBaw#mst69A`~1xO7T1+UsD3Rd?`7DD;y?iww*Nm+4% z&8vifYha99`4Fkf$<%)cfwm=(^$1-U0Q!U0bwlDp2R~SpbP}|1=*`$BOTEBrHG-;W zrlRg|L+aq+jDc<2VNP(sZ@@wyh;wHKL9dbwss?iaYW!C{FwJBS$b+|#&IkM$T56*o z7-pNF?Hc8c94O6gCR!-7V!&UC*--ovbnYw%ArhXMyNILg5)taEA2MH43=DvnxZD^3 z^HGF!3}{o_0k*rG9#rE6&Bz4acmr5&A^@T7Wri9+Av{m}Zwz=JWpLdoQ~5)0mkMcj zJ~zgMA^3a1kHI@p@E2p*`MF;xFoJ!h0xxh85+%^mpQb`f>w`8`iBh1slL(<=flkV1 zkk04A2G%M^Nx%osobQ0YY#^ZzZKydID2~o8j2r@x(4b}ag9U&e2&kT24U%M;L2>E} z07-m45Nyy(pu(Qfx|iAwpdo)f4FMpC5NbU*_dckgKUjIotMFgLUJP}z)c(D`(%dzp zSR-%(a!Qt9Qr%B=ZXNDl!NGD!7ajpu2ApwF9w@n|Y}|Ed@6$joKV{>1fX)ei;s5|5 zT`q|0)u{1+8~m?^(iBa2QM#qRN_`tF=c59kwO(}I7s+`^E*!kHuu63-+c(6jzV~kpm`ok zgZGdAH;Di9QHmxwMu9T~bLj(2NOr5XPxX}1Q4rPjS13W-HVqIOV@oJOqcYhSTo7g@ zj2E1AP{uz8@eJAqE{NAb)Ts#(NIC;$1SnirHUw4T0>XGqA>*At!E#0^RS07gxdPpd z$Ivs7r+pS0yc8k=je8J~Bg_!!0MEB6OV#2tJPmeD2dANIu0L~V!V+#>n+U(QkytHC z^E!WOoaf74SE3+!zsrQ7zq0R|1dWG#HxvQ_BMOJyN0`ZT#)$8S^O10`sL%($zQGUr zr~}P+h4!}v*w&Qz_Pz9o>lg|00`00>uVYr2Sh+~oMR#hL^T6M`AdA%KP72OnJ8YVR zi#zq=usbs?q|IK;<;)ahTFg#oRB>_X)(?&9ejjPuB&?vze0{mhIf?nLcmO<3P}?hn zP4^|GNxw=D{D*j4U0MUKF^p|Zk%f%9MHR~}u^HZ0rR$%0nA{rO%V~!1RtTC#JStyx z9L%l7keA@%<#yO($(FVq_w)|2;%CEaP|#&#e#1p_|05^Lpk)OakSF&O^I8@vgy}e~XMJ+O(&~jItk8dvz zduTM>kTG_$bFGYLWXRwVfRFXy+!5kV)mbW2w-R52%TygE%B+<;%ts%q(K;J$114B3 zvFn`ZyC9uA2RO|cgfWQoQ|}e~M>}A%F;Q2x;v>jaNx~Huz~Vg~guG!YwuC!Pmd+iH z2Xe$j<;R6qBVcszAsLab^V?8@eN=7cOx6Fn>+QCbG=kjU|0>7F3Ol0#DgI`m6hRGx zb$8ADY44bte+TB{DK`RcGH01~$|}S_&iw1v(D5V8GTj&P1KI>aavGfi@Q-~tt4F!6 zxjw+^{j+lExYK5Rr0RDBNI zuz7%WpC6-=ieh7V6z?_=DgAwhj<-Vm=1@uzxDx{(ViXlhbz0 z05y4qNQ*_|XJlH-H)g$YgLK#vl5A0Pg_c7e3)j+ln>ly9-0JFx{LvD3NYdyjmyF#_Nh1bhJ=a6|ARfEx}Ov=A|JO1?=}nt?AeE znL7wQH>*V{ykhjGnU1-+%1+I|Qg}^yci;a}zS%($DqOAg3W4Zr>^h$j=Ph{c4VH5QNf$S(^xgrgzo?a!il3wC&-2Y8t zST8^z55LFHuab4B@TRpJUr$cw-RFndbw%cQ16lJXy@NBg(6q~K&UK++sXxYg>sYbQ z_CnMM@_G)D6vLd0-(qmzKNOu!w#6!wN889-dkSpml@xI-JvN{>&!Qg}x?0s?CHj8k znB$;9_E#bP6G0Cpd_pdxAV^G)yOW0Lw3!unSMlc>DL%Y*nVw57WK9t54_3(N5krDJBnb6%( zq;uFDk*--AaUifRUcS<;va#$%kL*tp*2)^o$ox=zG$@_PVbms6$Kf=I8v3SAH$$Y_ z@tQAI2h9Q4;S%>9uN8-3_wpkX)W{Lz@<*OUBROR^FNhy8ukxytISg9Hj1`|Aff=Ti{>Ylr1I$Z%o&(`B z_qHKQAYpO?wd!uQlxl(Gia5Yb-OUo$|JCHZ7VZ|(E8mi4bT-0a%*3CouH?wazaVuM zQX*69SyS6N!6NTF>@}t`X!A}7pYS zas7|~PDRO0>~%m$wdD<4;xOv|b-{r6Ap|irs~liAtJ`6$s^_lQ?P;K0rg=hF1qqX9 z-34te>soP9geWCJv3)F32fomSb2lPe8e9$D&Z0d>>RnaD3R^v~L`X}w{FnR@;x=sl zRv0_j+vz_Oa*Th*(z-2}Ohu<=>1;Q{NBR+{nJ@HxiddJ`4Fau;%-+Wg>e03LTThsB zH^bMWUAtx3&~s%cs6JPWGkQB{h|qXQBr<;nUsn2>2=WIL^Usa^CyTo}OnAo-qPzbI zKuN=HxDiNZ*j*MO_N)1=;rtxV2HsV93H@owB;$t~YX7m5#pZa}5hzjK;V{AV z!_tAUy}_nhwY~oN10%=%lgk_5;{69VX3!Ny8LSnIV0Mr09`%sz{`#GC!8`qecgs{& z&Ey|uDZU4aI#`-ioSDtwqO6`qw;v1ho8}AVWT&1l5R1b0u4dAAV0l;l&Ebbb)r6|@ zxWa`U6K2;Wl2pvWIJ27@92($^MoT^Qt8_LuBlxl1y;jYL2gYFl%YL(-rfZ3jdUO@&HS;;O$Gn-gNq;;w zlh4S~(BqwV`*8ja)ak4Fg?iS)EVRKe5$!(Sv~OOVR;AOGcMwcq$vT+0WQ*oAOkPiN zH`a!}3MLt%?2lLk&eb&c`NNJ#A>&6SrNNK9(Nk=gy&Y*ik-tXPz1)HM37R%4JJ^nD zh7zYbXt}R!@B6a2ZnUwY%a?mZX%9Br_(Hi{oEJ0UBC|iG;P@3XN&m-jig7os2?&nJ zNRBMB?)-GSYAexyrLtpus>*55YYwN4nWB<#SvF z1pJ6_ZWV8rz`=YJYMD^o>?6GuQiyJ_EMk0^qQ7dJJy1|!3GLPMqZfUFf+fDdff#n^ zkRoAdv)v}Ey>)d4GLM=6kC(Q@+5~EENOv z77NGDfh+-cSPDQ1@>{Irw>$FlRNy=N9$AWfG!?yy3|7oY^tXW z+)4S~Aw^a3w#Ef*u{yhhQ{rbRc79MOU{{t7PPk}Mb&KIcwGztC?=M1fLq@T3HSK~g za<7RV?LJwmj!jujOXBXjCP4=ceQ`h?6t|3HV-A-OFyyIdU(!;(409eiT8Bso?&$TV z1tJ6%`-}RurMz%fO5tKEd~|2@K$nH@FEY>JUxPA5w8aNQ|G`&5YuLgt;B^w#{e8bJ zg(gPP?O;}VdWz>la#!LF${eYotYu>klrEhHcx6&(akisv`o|8bJ0<8Vt_`O zxhWUYTk(oS45cCWO0bA;mRus+?iF@qy|&a9B{;@hKm5Xx#GMbm8~`?Te;$y&rKTND zSWjf7Y=>?1@TVn%o6FoTBb5b)oGi0UJp7<1LH67KvQi8U{`>$Cn5MeK4grVNbnB)j zqxSKb?Np+}Xcn>G@!Ub<05^tI(V;3PWIX9xLpg*8ltY;K?5#Jv--~oz_{nWvNIgiZ zCF4RRidH7ovD&I4b>)=Xo|w(Kkojdv9XV)~yi&|~9=l!yQS9q->U7476xW2uW+yn( zF%%AiYv0)l%{DuYobS}!THFrn6J5iMpvoGaLCXw^1Lx|<>@;dBb)OkCw>no8-MrhM zmr543njF6UB+_*+>)*q@$gP!8s2umXnXT<~IN-iFLYT{te~}TjHJ=%ew%eSOAfzYX z#}Iq+y_avwL5zU?lAEzC{jrlw?)@>$`z?utwyM2%xHfGq-Yv&DcavUf#VWkkfTGKG zvEC3`ONqi}WFeq_3wvNs^6M`wLtIb$V0M$AQR_aISmm}mWIpg{zy~u-{wd`3UW&(HaTE20c9Jc#MwvB8ie?D1cr7m#^mv$wmEp}{|(8xwZ>uQKQ zdX93x8ViBNSjK?dn{B}FcYf0oB^TqLS<(hW1G1|CRqEL8!7@(+$)T5=?liqp7s zJAQH-HhrQYQVB)&a z=;<(~-~|o)+=t@7hZN6~%QgEvLj?AIHpiudQ; z%T?WHM+U^i&{DR0=b?tplECS+)Xc_1$X0oxOI(yN(RJxzy2K`5wzj$v-$8Kgahr1^ zB1~}NaKK_{Ko1Y)0h5py_|Y}=ehX%8C!UO&&K4w3QYE_?4dzTFM{eY&lMe?lxb^+T z5pH7LG2agqabpyh&1t$dIY&O2j73Hgrg&I#`f4mkmLKei!bvewW-L*=&2q0OH0caD zs$7N!seYIVkKGX;Q%8xI}KyUnLNvR#)}gi`Hm-+55e(l?`Ph)Y?f~tjXty*(e5w<}aFp7?FK%8RiNcS#+yd zg@G-O))Z5RUXF-jQPT_x;t30hCUbo$;gmyQ2Hrja{tT;(D+RUX#j>TV zWCv08)zT@iKI;3BF#xg^z)(j-SBuDm`vMOwTse&7w$=1&y?({wS8{Zke^D4nK$k&C z!PX-gSBp-BN{nl!L`5Q1<`=TdkV zgKRQJUEra2v1O)xEoPOOkbKqCE7D=~?b`Auo7iH%nrO2TSzD zQ9CiXQkU+}`{@_CCYb2yGd~4ynT$HlI4ZC>YtRYaKy;AbTP_;wd5g-+PTDT7uZ+f0FefOUh614g- zVPbHtC)%VfJa}}u6BW{%#HvKxgfqu%{!&pCw;Axv7kqI}Dyc8+&}`J1cMx{`^u*3tyeyVtW>aN%DW3%AdQ~$96Z8+;_XmwLyz&wbIRr16n9+ zN(1MDPOy}_LkA9d*j=WDab3SD4z(mKmhEc=R>Y3J(%xEajxaK*dZT@^u`v z4`9~U)^JvmPAYI^6r_IKxIC0GwcGENnB5w=wOQf3HCvTu2+Z5MpHcAjE?Oqn#vd%? z`5#nkl`0dTrGft!^!arwf;08?Zz9p}(hBJ{sJsIn!{*=qjzIl4-~I(jK81Jtx!q?< zQ)v|yTwX8QW^-#qygt1~(bMb_gINo!REg8b4k4=;B8~872@kcR$VSO+c-fi+d94*k zGT7IesD&nCmtQcH{lYW)a8e7SV>>GE-|?LfDXZEYu{GQJ{Y}qlb9X{rzLkrO+ul>C zxt!894dtzO3&q409NJ-KKbgsK`W4lRN>~B@kvG$RX%9DPrb9xUP{f9NO~V459Wcq$ zzjwzYrMoczYct-3I~av$6Z&?5j9m5wyjMX}gj{Drc9_fNsWh&iz#X)@ubuk4tDK2; z_ioJI#0sGMRg616KZdkd%`(Vm8siBkEli>J239&M2#b6YMJj)1Rk`pS(?e`TbOJ*@(k{;<gx?tcClV)N;;bsz zF}nT%|F@n^m#E5P`coh{rgGeo!PRNfGLXS4U&e@`yfPUU@&+Q!8;;%b=#v)sZz@?8 zTj`>i+NHb|(9wIm`9^vgcZ9tA4X6-3uULj8MxhV+#%g~LhJH)FaLb(P*WBr4S#_~; z^JzFY>_#Ti9`0T6#N(VJ)?&=$II;^KP_PAflnvLusnDpXJ0ZD@kjPw+YwP>c5Dkm`4l1Sabo9wtb8jml!XNrQI*Q zdu@QNMgWWng4{s+XrQGPp7~gau2`r~ZNZ*Fn&JC>OxCX88wy!S%j0f1ovz{S13p>_ z74i>7H*5c@Di)|~U3&Ji$xr5({pwO&nk)DKA91t^eocHLS~iePvCIVGJBMNM?WnlT z5x6+2ag|Z8bgNZ`$K|iuhAP)qZ-;Z3b6^yYiw{A_W@NcAOYOi9H?vE!~6NSD_B8t`)gQ_ zp0LEvB%HbV6dL-*K%Aj$CQ`NIyEQ3C$^pAH?oVsA)kSnj zKWr_|c6K6p&92`B?O;J~tMY+-R@B|qOa5GO(C5}(v7ID(S0UUEH#Q{`gWpdgv=a_O z+pAFSw0rOCcTTeD^t`7omk5owIkJp7~pF|DsgIifq%(#z;i{mqZM zfdzcgKE2<{f~Hvcev0;goHo!_vYls}d6uuXVf6N&x$UV!`xR8e+`yWNw%=Sq>lEA@ zl{5WG(%Y-f@dshb9lj}wx}$#qQJL^3aCd^{AEh;Ip~MYXEJ!?nena7S@azB8MZY0aq z!cso%J<$I0l2uVE+Tb7)&f>0I&Y~-7Q_uK?YeF3APUl5x4A3+)8MWU3VtcKo&4CLk zCgRfi?ikNiN3tiagcH7$Q1QpEi6C5^1xs1Sqek11-tN76c)h=K%6?<~?ir-)aDidJ zZJ|umUU030IqJ_57N?p|o*dE6yE86QlzV*UsXNGmowFjvG&qXvo6D?hWUYrJgc#+M zs_gJ?4nR;dm(((&R z9BVV*h+4hsJTF?)mQbyA775AN8-kk=tkkX`-^zlqNYwo0hVhElD@gzI_%nFUMV2?B zuy9=s8vITCA9)E(4kIRb%irSUL)w2SI9C2l>2>rusF40yd6K2vaV3JIC}`NqG(|(L ze1(8t%q@X*1{g6xvYxea>6j-?s=2APO{jX;8D%7SdThw9H(&g)aY`#NYFLQHZgT_1YO48iie2~-KlZuvxR5=x& z9~c2WH=Ny8r=caSR9RKY8bS}COdjy-|Bl)~$ohY}f*trM_pcl$XhZ&o?fnlm{SQg} z|5WOytwV4;z;7#YnBV}f*vSf3 zx&Wj>HD>cI(ix7n^mfz$B zDGa8aqTuVX;AcO0jx#lJ;|uyT%hvO)Z!#+&v7^AWDPzveUu1*Bd!>K(eGz>9{Us`O zv90Bn9WMG~j@Ts+`3`YREbrd1x`{ZxpMW1W8krt|M*C{A-xjbKQ@)R8#w|=|GZi+J z*Jy#4XgeTukKuG^pi5luBOfE{aB!H19*jO*#NEePT_6k#Mx*<(PJTks^nCaSJ&Xu}GnIfmCHWSy+Pd=dh}K{}#VuOrnC~%y{<)CCv|#+>#)%8m8Z)Oc>;8ci!2WYA4z?+?lBtOv3GW z^U_v~(iPzMT1|1xyLF0d3^bzhHtNzK^=XYz`<-l#L1uSEIt=IVxSL^C0&#qGIv{15 zR9OHv;!}?2$s5dE97#CqjkUQ7daI(5?fP0lKaIU_h|Cmoa(h%wN zqK>bl)m&8ksf~Kwe*wi(G9w&Q6tb}f|F6|`uEvMP{InS9~(h|V#|%bJTOp9wJ-8j*cba&AMvOmGM*`H+wogT zwl3HA7T+BzG@Zy(#gDxAN!-N$!H)_L7NBoVx4Q08YY!FwGUVlDIN*MD8Q! zX^#q1%|Z)jm5vz! zgo3WxSNGnc;SSk_Q9j8Mf=Emg)& zNM>lNJ!9F5SHkEKhthdOoYVX0AB-SIijM`I2WFFSkAq9jR$wv7x!xzq$0v!NhqS}f zvAq3Z3SMTgSbiV7^R_x>nAjxR(3WCpDjMcIQH5!B#xZzZFEX%hm9t@hZJ4@l^!wHy zR!$-?^B3YO*}}(cj&|@ou-s z+jYMIN38E6e4rNuyP*WQHUa#TJiIot@s%fL$X#y}l>{jBWx-m|he{UKlCK|22|AC^JukPGp6MI2 zsHm-lEoP_sTVSZkAvK7N-T-Dp^RJ+EnP{QC^_XY6%!tU|q1l;&{HilaMp{L5V&r0aftqr#wmcAnnSDjNU>Iax^xb)ZJiwZ(Sa zaoL%qH&;cRqG7UWn+{5hy12DgKC|dE^BQYs;}+ zvE4(_a#GME7SV+Qbj+K%+g|@#S4!dm5)R+^KJ`RboU7thbKh-$(Y;@Kiq!eRA=LTE z!`bnPxv`Bm1Y_yCf^m|8LdER9-#PZYwmua!NSCSh55I{0dZ-2`YuV40%Mc>HP@xq1o14T2#9Ec~>71D)_ zdlQ^@R*Zrd^&%cF?)jKvgmw-L`4jm&aD-%2`UbuAUwOOb5leIO*X*Cw6)dfyt6C0A zqPz~1%w{gd(`2XYE$%rpYO#pMFD2yf^kl<#7^;h}@h=n}wBr_=&t}CG-FJPamBqIf zH4!K)5u2yq9N(k2X2e=Tt-3{0wK=>e%*CC$cTDOZP?ArJs8SDr-Si3zM2GLbvE1+zhwo&jC%?FEi{;0$ zh`lBv{KtgLC-+)N_~utstlqol4mxu84YV0Ss;I#TAhZ_8C#?^T-6s$Mh$Jy=Vz1JJdyrlX@~uF4AG+` z!*^7!ucl39)L(Fh7iTmmJ#Shooom34C61`TUwSE7iL9u>UYkK;^$S0 z?g$;jv$)LWH2d6BXD%KPJ$Y`>_7^{1KDx-BKk;LKtBZx4z4ZDQ{Me$5+j@$MUv|dc zz*37y)tuU;{$aLoqd$aFHVw$`;|i}tzaD-`)6>(=H@d79LwoAn@Yc8#NBSIRR@3;v z30SiSh%&HC2Q!G}X1O_NdZ&$`cY_UTl%Bew<_O+8yq3-O$?8RxF930>d$XX{{k*?i z+1gf<_M24M-*Cf!9OfC(&fnYFeJ68aN1!TIN|AOx()I|<4wTlw=DQTA+*TWk)h*M) zch(bRF-VTTUTkbK!S_khc%Fr|kF3IG_sT%Zdb*o~PPU7SB<+XFBe3E5TOcf8Jg(pg zM`;@E1fimxD{VQ0w6ABH=WMn&*JA##7<5E$;f;3kvqY07XJn$azAN1Pfu~`1aWw*4 z!hqa~*G11p-|HKi;TLc7SbCtvwa=*PZjBSiUd49L9Xo3Ip)$SaD%%NI5QE!sSnT2Q zcJ7a4CSBf{I6j^dTZcDIht#%g#hIIGI?nY2-tN}rJ|U%-yjF$(>bPJKjeC%snYi1B zZVgjca-Oe7B{5Iu7jl*MHQMr&i0@Zw3be+!R);ajrwUDf=JQ`FJFj)8Y3-_&h2>-; zADZ9UosefSfeg|v&YEi2-uLhc39qwF>ml+V@pu4jS#u|M!{L1MS-;fp4m%5;cvtn2 z2S`zOC#RA7NX*`pk)8{>6D|iF+f`F+F~fY?)T>M=_eD0x7c90|dcCExd9-w0Y$XjA z!<8hwH+IZR!3kwrln~`d>S96LJgW;z#{?hht_^OCPpB)V3(dY;;uhUMcGaP7xYyz%q>&XSpob&eUx$peD^S-U|T2);RR<5Bzasn4XM%aJ- z!#{U-)*I(~39;OQV94a!r}+C-N$*mftNxUgrk9>anyFM;E^ zA`9pCNmbr#SO4=~IClBY)TQ78!Dh$tO5ma$#lI}TRZLs%I>!IL`Qq&3 Output files From a28c6f1463c455788fd7d212b6cbcd183074caa6 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 15:46:07 +0100 Subject: [PATCH 066/129] Relax linting --- .github/markdownlint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml index 7c7628c8..cc8e0d33 100644 --- a/.github/markdownlint.yml +++ b/.github/markdownlint.yml @@ -4,6 +4,6 @@ line-length: false no-duplicate-header: siblings_only: true MD033: - allowed_elements: [details, summary] + allowed_elements: [details, summary, p, img] MD007: indent: 4 From 486a804a94f1b092d32880782ce45fc4850dcd8e Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 15:46:24 +0100 Subject: [PATCH 067/129] Trim grey bits --- docs/images/r_amplicon_barplot.png | Bin 29052 -> 35501 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/docs/images/r_amplicon_barplot.png b/docs/images/r_amplicon_barplot.png index 7b2bcaf7aaa07d1d1b9e367ab7826a4cfabb3dfb..9d35f2987bddb086cec1a2c3694cd2dcd469fb09 100755 GIT binary patch literal 35501 zcmcG$by$?!`aX_<2qG|aBLia4-5@fAA|Oh4DBazN)X<>PAPNddr!+_-jYD^Lck_E^ zM%;PM+28Z|oj>-qFWB$qns?S(&vQR_EdQr65;rkOF;GxYZa$KPJVQabl!bzFMeq7G z;5Q`nVne{cF4;Vjc!-kSLB0t5aMeIWS_B0pFZ9NV4l3|7x}~Iw4GIdbE&QKLWBOE% zC@29BA3;Rq9W_@+vDFoJPlY$4@*`v4e8#gmu?>+|Is#6 zeZy;SVySoA*TN3DQ}aJT%KgvtQ`DX-y|oQ68M%9o^OjeP!%zE@)TV>b{(Jg;u%E~N zhs4g=T~`62(A6;)0m~9H3!%hg_XODjdQuz^=-!9YvqW3b6Ec-)-^rpY^Se#Ghg33W zvtpQ(Q9f^v*tbf(->SUb@ah+8tWG$0df?&V94EjsXjz(1W~2$1Rq|E%R$RlgYTvbN5ChGlg75 zg#G68*47ubhVq0(+e$zSGbfswbw~Y;yyq?jHOy1YcS4;j$4o83`B+g)^T*r|2ouwXCM}zT8*6%xo8<(hs>)NX0yU#>W zAD_R0%7|);L{_FGiqdMfwv-&PJ7`HXe6ObNJ*^53teK#y@Vye1ejHO)B?vC7QZ6*M zTI`|2b~m&$QT)*`+NPS7=^!2E=sw?9^85XocN@^9E@Nq( zVSX2D%_Enc+=zf3Ok`^~(j0p)@2<-Tr5kY)$Tde47CyI{P$; zY3g7qQu0U+&6YG5b=+;cWKXc(K?Dl6meDAh}DA`#ZIxURye2UC!04%e=s;G zyV<8#*`0D;5FzD2?Zr~!pvGYVBcuQYtveXsEG#1!H=1=nRf3l7mm7|ip-~1c1l;76 zC@S@zb7Zn#AMlOx00q6$=ceH?M&II|nUJIqBE}%$!`vV1zMSPU5(-UxIo*C+El;ms z{Uiz2Zb-Nx2+11z=#juQ#Jr`w&RwBEfAw90_?cR0^7FV$(Rosdqee|w6eD<9apSMQ zQVb_;@2@{FDg2f-Uix`!H_fvo+3A+E01v+Ka4wZo#|C ze>^graIs_eLfx41oY=%g zR(Ep=+bg+-y(xCwC&ou)JXW&-CfAB4INO}p&>2pq(4u!5FOiRL;IDOIXDOU9t(<&< z#7(+}>~j;{TI!&hpxboKMcwxi)2%r>H!dip&$hDVJL;YE`gmgI(Qs#;*(mko2G_aS zE%(zNOYfUUjC)ZURnFPTuyuPk3TA@4qea*2h$uz-A1(5@yI!H|IeZwRGu$CC|7>P{ z-J)(h=_on1pJ?G~tJ|nt$+=B@X4<#ID_@95?qQg0oqON(I1N2$>G0T34YMyPYCK1E zNBd~U*STs4+iN1mEh1yC?`ZH%`$){XV8D4!{#dxSc2a6ntl!^p`k*>j=V9933pLxk z5pOfH{GyzF;rJ51^XzqwYd+9+bMR)MGBn6uF&e+RkITDEtHsQqFCGhN@d#?Tv>%rGA8&sd z8ues6kdBM%nmeoh&^eU48CWi^(+x{yt(1PXtlrUST=>u_c+T)NxrR6tbs>j>EIvAr z>TJoQSx_)~ypFcnu!aPT--6xzl z7_&T?@7LvWqIZ7>@@@zePcpbEmDr^(3YYUo6|%aO;*4&N5PoR~TZnJGV{3C<6ghcy&P@I0 znt>_=@(J_o<|7ECMY|ge2EWWT1cAcY+D@NjBoUX!vE?pcm&4ZRaQ4tP0jm8I6_mCvs&e@c0v>p85|6Ktj~mXD@RL2lj&I1{WY#NQ zU|1*i^`5!Ik}HbRVl)1YJtiS6-kQ5*<;I43qwq$1Z-2(8w=LK;?**_)LT)VZIFi1( zM?q zRXgEL`{bLmaRQ=C zn+n$%^37~==huipRLocD!{cz}NE~R; zBd6L7$*!f>T7itev&_?j{#mrvj0F)2l%F&p`fOjS6HG+csZcU0uRPPBJl zSBSdG3j>2`C`?m#&&BnfCp^(eb)32~40k0|JF6$qb7n=z_PEPY&oLZPs^j=^8WwGG z87wgfB`%Sr3|QteKK;R6)$-%R#?1g>l~A9Y#6~xEI$g``oxuT+gES|egStC7=O?rR)s%fMMbJ_DQJ9?Y8z}TeJ-=>GZQ4+V zWJGoDQb0cqa~1d2vsu;bx+hXhy8at-4tjnRZXDr zi06j!2)Y*(`l-*mu0dAsapYRDLp?_EEvq7i`#Jwg>`H$M6d3} zC;h~4rIi-cP992a>c!}4eeL4oG#O!=^VK!)@5bJ%1x1vgH+-yP!8p!49}rumC9-1v|skiR|b{1u~xv`D$#a;{CMaF~t5<*}hHG+NtpqAeLWpQ)Ap&8ei& zjn4;nUSgRJ^`P2to7bdx-^y$s2xXfu9VvW$_}ygye@lSNTqV$ZV)WqoaH>W@t{pQq z*!He?0mus~X?&Ae_4U-`6tfhwVW$S9=BxBw5+O{&Q$m@f+Hy2dctCwndYK%|gzY9x z6+srEo263cp5gdnh_QW#oloG6$4s|o3<$4sR-=m4m=!&g^Xy0>UTG5%5y7&5NRLG= zEHdH@z{b9&8r%;FBHn>oC&ZBkl<96XRa_sT72HHH8X8u6IeX5VMD5)v`#2*mmR#UE zs-~pMBm2ddyv;*{Bs!J!Y6}hTdIX_RdD=*n3^LA2IvC58TKCouEa9r-p=f&#r=?568AyFR^xM1iD)_1O%%o_LJXKha}DcqJ$BfM z-(*M<9ZE`2B<>_2oTiT#t(x|>e@}RnXKsdrD~L^2n6$qKyJGoK*rWiyqP1R>1r|axuqz2T<|WhZ=gT)L>8~ZNLq9YCJZ@CBHG;Qn5LZH zIpa;vbpR!vl4;q+yc{!mHPDgyZbIsUK-8kK9_DxMj1|KXgUSO-l*A^Zz%=1w|H zuBwAth_wb9^JcAUPqZ&xsmaj|an1@#r_IXnsQdl8?)x8RqJiSHB|JyS7!GU6 zuUTTiWtDs){7N^+S0EhrSghv-KgQ0yn2bq&lO7T9#6#V8?v%&W@1kDT&ADdYT-JR- z6!o7U5nqPPbkXI%O5tNU`fDej-#EbinAZp!uZ)@3P2 zsI47sK_IlBeVMTX;uTjOu9(TaPJ)RL$A@{ND`PMBF-blOfLssx_CDUXR)lq-!sH$s z`OA`fwgQ^cxwj1F9&0xWKpapp3Lh4rC^Cd#ZKi!N2sexK5_G7B9(0N329JAE_F*Y6 zFL=#vj!x!&zQxRdUe{?*8(BpqF&tW-@BQG#lNQl|Se~8xg?h${EYGIUoL#eLF}P$~ zj&JwX+lhWWC3sP^cBNupSRj6m#B`4?B@L4Z57a9wdb!~}se#ke&C@~|x1)Pop)a+K zKV26xDE+mV4ZvdN2Q@vL?;nA_+^49$KUWBaKA|QRawQRWrB~IIZ#62aovmhi@PxCS zWGyJN+V&%I=qO<3kUZ? zCyT|TW(phIhE{rmy4iGv`WjW1ilp+ov1P<`6G~Eyij5rwx29ZZm=&@c794W?TfMB& zbL%zYVCe=m0H!MCSe3q&dZvCn26-^(7DB!7foiF7Rs@_Bdsv3AOtEdC>Yj`KqH(N1 zmAX}IF3F(?ECqq&n*bXbADhTS#TlbCX8n?!-1Xj29EgttHW-{9a}eLQqEla-^x3xL zfT3fh;{hFI?LyV+j-GlwqBNxGC*{>4o|qLYvfO-$&r5D;>V7Gij>)qwkMu@yViz_o z7)%nQTqu8oNH`=SZYO56B^8I1?@}k`;wLHq4zlT%ln{zjFxwBO$Qlsm!NJ4wJ8x9*focQNA`i!P?2S|E4NI#SprV<;RHXr1OZ_T?p^%&MD83J8k6ct6;rn zl(l{JAC*2*JTG`1jw{?XZNsRP>-21lIgc)z3}baW?}X~*LH{aE-Vdja`8-Zrrf2CR z9#5ah!IvSuhd541v6rXS>+yVH3&?UEGn2?CYs%!^`(EYjr`|5Z zf<4O{#`XHBVPP+H`+kKB4k(<1L}KZ0DVD{85Umow^YcM#OTm zyv!~|z|aZAx@LE$~ouyj;1Rw%S$v{@VzfGFTp zPW?Xp9Q~k3&~_eI$7_^RD-Gh)33=K+f0J1I4BuZcC&+z2wnX4=6pGYH5pR|=Ap;ov zM(XX@RW_%k&Z6}!{L+j$YW9Oc6E@Y$QE2|nTF{r3Dg3Z4FX>zCd#(X1J%`GL`-Se9 z01(cc6q9=DhS!{9UiZz&CVq7tD7V7n0g=Mqff>{(pVvenGtBirJ{$|bq(YRXo*P73 zE?7i)Axo4H$XV^FO5}m;pdSb?Fw%HZ*G!yi!v2&141RX0uAzu!Nn0*F+PZ&-DCl=p z=44IeyA_@!7<>*Zv{A*o9LD-_iMR4RIBl=Z2n34B_^KunBMf}h_`dWA{`C0g^31@y zDbxi{u{cs3h<`^XCTm^9-7gf_qw*n^Tp{V4@rQ-F#Q-{-)xQ8m{49+uF( z6_qUl1`k598e@IopU{~%tHfW2Gx05OOl#v68H4zgr*WFp>cJoC)%8i$hI582SDr>vM4xPjzJeKOe z1?mpIRfbQ-7ZeSY`c|V;{DQg8jTW;8_yiCMp8)>B3fcrlHXFSh)V3q^C#FrIdSK;q z7W>xFSMvW3SF*p3=QK$SZAOT#%F&t&PyQ#I0;1L+_Y~y4>Eor_<<$S9<6{hXHl%i_hz0qln1lwoO*+ zqg@HvP?bpjMYG@#T)!^Kj)m*+xTIzdM*mqb5^ZAJi{1SoO~p zE~(@~p^{9}E_lt}iAjzx*)+$TMyBJ(&V5^fPtd>h(yOVA#Uo`FJMO{RrdwtfaYn-P zIRC6@YqM|~F!&2cg(3Q5zd%vY&=F6_Dj|jcP=8?xjZ)!~2@yT)_juN?DQ%6O&J;pq#?y&!!d_!?5ZUP7?+2~eRCeS6kT>e z*NRZocmjZxL0|_1byGNIgCT{Zv#aB~^eum={5MN0WCi+jac^6vd`HghNeiHDY5vp* zTcQxUCOv=A9TIy;{fSTZX^{Z(Abv?VexJVB0Y@oC^mnjB@+HE)KC)%ZE=kb_h%j|h z&BCh<+@9pNO}sNyt>1bi<(?L(YlH$TzzDq6RNMpe`)8<;eARQ}4CU)Mii^CAtc-R5 zM2qmBeb6a4YQu&FjERiQ>9d7ZwW*6R&~s@UUw-8k_zL&f`gi6CNZ56L=lT8VeDLCo zo){!_S$1u$11J>Xz{rs9BZn3kQ2(wxrCMA5Hy|cn?IIm-^xcP-5$FSY=H0lwSqx;E z9bk}G{mzxUd&bRp?KuAkwtxnHgtSWk3)s@vyXWFi{9&FK*eUuqwJ7mQ!lTpf5!?=|0$s@>R*$^E?E)V)WO$3Trk@^%a7D;-uP z#%gpdLD0JcPt8OVUaz)l?D~;+E|$g$Yrsft6E$`p}d--CB=x7V3 zCWFrq@qMS2Ow`=lg17Etd^}$fiq&MyFydZ&7;te=`@I$(bC?jb<{5s8ggK*pnWFU9)i2(< z=j|)=ja`&)u?b$lFF?xDoHK{+ff;gbX--K zIDj*Y2AtVlSC4-MyQ!GWXa5E4ZqBSUXzhHU;^!4)U-G0PyoTdYTNr)m2Gf@P$ zX9Y>t6}6-+uKHqdpB%Ci+1}%_MSKk<=r1noN=52;Z(DhoUw#A(RK9^GIz2T>VAk2} zMP}mO4dqu;|ui7 zvoIL0hM&=w0W<%au551pP<+_4{aawL4U{gd?APgJn(9gA4}V{X;^Hji|7l^(aMtxb zaITUXZKXIi*qDz%e z@Oex55fWG%NBf#zH9Fh+KxKNlbXoFJ{_}ag?lIUA2-vL!zC?^7yVAE9sfl^au-;SR zq)|S_8NRa`sIL&IJ+e&u8>a=28tMZSHs?EkHIRoE# z=Fb)RxG=i7e1D#yI}|==9x4_BoQzw5uqi{A);1ji5kvZMZ&C#F1_5(IMOq}R?De<3 zVg(nO3yP(Ujt_^-2CFG@2-wF`}>L3YiS(ytVf~_k~Tco2c;#-gtbPWUF&`1ALrZPFjl$=N> zc$f>#Wz8M5`O#0Cton*zCPi*vH(Kj-d7X}wh^8{GZ(^r6qnmKZS1SLP=$_5d@BDH( ztripvn|d;}Kh1pZ0JD70&ej=E zsT4?4mT8>@V|+Dx_kWA^zfETDh=TUicMoI-Z3_i}b$inI9ngxr|}3^anB1dNVuAq3@Lpyey1-*hWiT zSb=5vbU$xKDM~I;&te7+Y2FB?-gdnp1`Y9e=bte3-d{`LAq;$6L^6c3-dO<0C2dB% zuY-vppI)c@_&W|Jz96%V?WpF9WW;%1AHgpsk@$3t)5_^7)1}Cg`@C6<%37VT#UK!C zR)>_^yL4euautFkcu^%$N2_kw?N@eOJzYuK+!3czICNb zrxO`Ruk6ntaDXEe3ty8^$ zu^obd0?^Tw3{#94KcJ*0_(AV|fsemV5Iji@UpOivLal4ijC-g#V-^pE(i#9zk5S!q zH6_ey-?k?dII1M9|8}^5BOnA_s?dx3QT8+VrR1H{yg=j(sG*gYxX134^4S%jxY~%u zQzgtzID`TA@r^2cfO3>+om#n-c?6WN7qi0_MnDmD-q-1EB6Y}B#snVn^M8JWz(!Sp zs=UOj;p_*eBrw4OtjdzvWv5j=;J=gDs1~ubV5C6@NlIC7&;r6ie`5E#CZ2(+wGO;I zoJXq_LS*P$uVoZS*56`1MG2ta>5m6~Lm>Bd^=?-p5pULa%1{m97dQOnj+|Efo@z$k zj05KN%)oFBhRbS0R;7!%bmkJQEnyak!FNx0-9+*fxI9OoVl^tnABl1|HYCCi&O9@u z0wUmbL51_*9Ka`kIDpp44Oeei>t|opZxgju`tX3BE9L&`R~>)R zwkAHX?%Bh771dJBJ}Ur;#HN##LJZ;kTl$utu5IFZjojY#A6))10|6!&aQW~veAsox zYu22-6Yjxq5*{eL5a`7}Ya-b5`W* z@zIO6KZWxc=Fi<_%@j~4bTAmx)2PoPSE ze{w>RHANRT31tKxne2{s&lQHxb8pBmSh2s%ws6u3t{faDrc|QO3^aA`Yx4n82Aqdd zm==Fo+?Hr`FA;#~*rP|-6)^LF?RePmDJVl|MiBs@Y;eLIKq+Dq^E$9RA@^HD@~*)@ zY$&>mG9k%Tl=9-jQ05z{as`-6d8u$QJ<)XM0hSBg*2V z8@Z|TZzTiPLKKhW*0l2w+5_ji1&#_3a*M|IiIEQ{_y>p74OG4$755PifVj=;0&9r( z57w{^PIN}R^y*ci)eVbwM`@VCDpc~1SG=D@H!Sx-nAuwOtsrGNG+vPqH+g4!0Q+MF z0~y!6o|!TcO}X;buZ7drFdgZU5Z!mw<3eO|N6W{rbu$3a2?~5&!19n;dr~Rtid1;H zF<%rQcj0$!%!-7=0yLnNjE|q%@?H1Bd&6>RS!Z+z^&a?BAWiO_)Q^@=G5OY2RY@lP zH<}TIzuaA%c{Wj7Ve3u_{bUqRHQUX_M1qfQJ*|=?KOPBxcoUt(rv3!N{ZQ|!OWfYp zhoxhqqD5~{*xbb8CC?)hVEx#U>z9sf(xZ2E6<_QbW^Ts{A|YrLjBTpJ@DR?KeR;$% z9V1A(T%nH*FQmYV0_r#EPsXK8ZUAbvY8*8V*YHT$O$wCQ#ap3=%47y#`Waf`Bb>rn zld95hvfpn5T>4DmS8X>zgm>bzk;P>aJWqRzoD6H3PLDc3587%E9)fgB!V-+`Rin>5 zg*XUlU7%{$a+=0yf(JjyG5PbzJ1MKlWZ#+Ak2h#ATpP=#nu%4BcCEhS&h2n305-3! zG6AXAyJH8FedOPw^)fAtI8+Iq=-J5YT9myv| zuc5l7_j?|>2LOatbC#3-5XqL{caK~yfH$2;3$?fPO!Z@gx9{>%c-Npmihi}5 zYrgvW-6u)!iR&xhjqZ};dv%mYg6rgb;~q9m_x>4y)pOnMrlus=L3ptwOl@%gvlWZ( zD_zW=CZL@obt!zw&iRCzxp6dI^A(}es#uFX?RRnz#UZ2(#hWDLSCp@)4z>_MUr)Q%`vy#tj1 z*8OJ%36Fv7>i~}+tA}ty<{>*vW|yy|U@?J%tOP>GfH!Jwnroo&VzR-%Lu2vfg#8i! zTwwMcg--x*h0#KwaU0$esxIGngW$ znr_qjTNfii2~WS%a7y6afv@Pd>-=iKRThY)t^OQ=MMOlsT?aPvjr7V)NC5Sf=lf_P z^oO)BpmtJjXJS?>i}O-;><{ZxX2`YO5Q%0+O6T3jKD0;aSs|tVORzs ziQ-Wc_AB>k!Ou`GdilRoxV&vthgi44uiB#Msg$;V5)da5m@%14(ZaB2LaCgnSE|ki zmNz-YLI6?f2YfV3fT?;4uXbk96$7(h{uZDDCBXRnZFMELS9N3~Ie;Inq>}|Av z)*mV8lNl|aX5mQBFX2fa^h;HDIgx&(YMcUweUBC)coV}PziNHz?~>bd$jPlc=;oLE z7@H^%F>@Z?*w6of&=}!UXK!u8xmdR^ZT&_Epj5W70L&#J)7d@!_C(L-6D1a~>(%!o zqY0A$6#4!y2him4O1h51ncAN-Xs3~aU=xmQ7q%T9# zpQJiO!5`~k%mqY73%f7mg2!GSAxq6+d-**aC?ITa|Apw4x7@hM_kZ^22=!e$-nk92 zYLhrWLHVNCoxkoDI6Nu0OywLCdt;so6SMvmAI9Mil8iInS;mH6EJ0 zDxs_L@D>~0EVZdLxii5KDIT2#)q`3(h_T{w$rsRlZU1 z(s>vk$yI>+8{AhY^|}T$@djXmJ#bfz(LK-nopB=uZh49@JlJ@NoZFm)v*Nn>a&HeA zi@MVPjVEWyT@@XH`8%3UOA~)8AP9wjDJPGV6&POpB%}FUf*?-K_}DS0M>l{wU1n2*<+_V?1&^XckBUePa4)c$l1GQ$KRx z`h($trhzXV$qdcw0?ZJ?2Qc-Xcw|M?)Rw$)?KX_%{)rsz5&Vqf^qGuaNvcL;VOX3E}RTE#%lm4 z!^Y052dBiqtH%JAI4Wz^UK}KMu9uXT%!(X$0G2`g!meS=kc5{9{2u=K0rYkxiA2r+ zn)%oJUr|N<=9dqZ(NP@*~S?qxZ8!%epxw>hp#^Q^?R|eW>|Y8r{F13>>4h2G-qXg?W|<%$R-EBy2#fR zxwM$ViRfxa?&M~|xUIOC!&&cP>zGV+WpssnQK^vx4Tmfk;6prjTs7Mobsctgc;*G_ zRYr>2?Lg1v;rvE=s!WM8U;rq(>~VH7uF#>9LeJWynXR<=FiyxI+Xl{4i%)ajxft4v0JufwrjQ%Gt$iI znZ=`CI+`wHfpe;V_GkW%RW-3u{mB5O6rt0~XuC{tFjzqkl_Vgv0-oQps>OV#FY{r& z_nrA7ynmpn0k=WR7c?q^0afZV021!5o6oaqRi(Y^@hYg& zVD}2JRN`<>;Td3sIkTVRLE8TAeju}y&oC}V`}&YQ%ke!kV_no|q8%Fe!@#j?sBLI3 z7uWl;ZDLxVe;iWl-th;rx`h)V%BIH{uCcfvs!+I~`bwFKMbLc0*cBJ!f|}w&aOB#E zbt2I8l8*33p~%T^u&!ZfzyFvTGFSZ?G)c2Dv$p1#IV|@{wZXtzFE33#?&Hl84Wy6a zLWllaCk)a6!;(7NX0+(y=I{0mGM|hlw+gR}=~P+;YJf}bU8hmjH$95#Z!-0|vypbu zWcgI(TZ3CNKZTz~V35aneGUA}@d6OePV1^q0Ad?%aY=x+)SVdU>q$%Ea+&ARm|0Th z@9^tF5fh2rslP9_)=$3KMpgBT-6%-3#Bn!-_-~4@s!YZmI1bS9;ess1Jp}ODZ$6O$ z0R6~&1osLyOW!j2xAnFJZp=D2s-2t~#vyg4ClB!`)qMD8Oq#~ZWuJ)+xF1{m0>>Xb zZdq6VTPL=zfk~Aj&kcl5p_98%Y>E4m58WK>4@ST@fMmzk>GcyETA@c%;peBxba;8A z65%V5drlk6X+DGW73h{IyL2FZ7M=IGTY#zVF!~@)wNm=l^qP_1KQbu;hhTS@llA9Z zxb_j>Vpfm-?q(MDuU1ou`t z8}dezN=1)j`)p_UY}5c0L=WIIK$Gs=H1@p$CxZD_H(S!cF9Jp)#gTyf9w?ZU4WacR zOa;N_AIF=NZqE))OC6*aTC;XjFzmq%TS;ZO2|mDcNFG?0P9X4JJA0vxHQX~s@ZX4N zE4^X!N9y;C^C?h|wgHRX;B;Of;0mH-3ltn{tn z$xi>!C9$>ug;I&!(PgXBxNEJNcO$P@3_U;wO~>*fE}gburds)}+xKv( zc`mzHlVYGXeAv$Z+-&)7GNo(Nk@J62o&Bj6PD>;2y81V5TE=6lL3Dg_B#(RN)Ne_aMMH4!!-5OBW|x_*vfHn|h*j{}Vw^m#R>W z(7TL=;vRHpL3yNZgxX+=p!Ity!AbeFAG@m0e%b>XRa8YXl$+IevK3zdb=3Z6^5J9) zcSgSo4I)M@FYF7b+p~Za$fq9fcP`~l5aW0~`S>d=)DnaM0!fdP!l#{o?na-$&X3KL zuk^NW5OZ3glZW&wmz;WU6CkO1`A4>tnxRN1XNlvjXAK4xx^qQ;GD-XsWCK`^N%wlL zyB6u(wkLF(9$J=KxkC4wt*(xPqyMKW-A9eBI}~tpApagg^5Z@5{JS(LaKJL)GWub> zqEE&_$a`NqGqj1059#dez`69d5R=-JzLgLsbWOQ1CyS`Z40Xt%9d5xn(N#H?sDF^T_xEFbvD zAR(tiK30uxsu;FjV|Fy10$~U}YGwQQ7xMpTbXlV?JHGMG`uWX6qIxqudd_nhEdfhe zC3%gow=vg=LVxqeWf>?2ib-ky;%J`%uGgKLkE@z%#yrl@wA_}tY!`dcD$Br*FQf zhzr(RAN8Wx9VTEu1oNNPh zW1Plu&TB!%WdF4@FCz_-tGP%Gl&f=?gnD)|*7-hiNUG%OP3LhVG&9J2Fw{(yw6_Cb z`>eH*op7imtAgrS=dI(}&FjObELau7lob0_0cf>jBJ12M$~%_*Wp+46BnWTppWPaO z_1an1`pgAo+2h=;B9HIdXe_tH#J8A%%6+SuRP!dC27B2Vge_=3HiGv9s=3kNH{OUp zxKQ(y`adq)d`|mrkH63v7~l6Iw228&M11P+o(k-V-eqp}Z3jsCzJ_KdLQlrkk`Y^F zyy9fym(9hU;_uFmsy+-Db9tKj3EvS)Ct{^nbSq4bg#+ZwbPY=KvM&_x1HMl5ohQ^+ z1rHhuPT#CJGmPjg0@Uiio6w={_*Pfs$(7bC@)m)h41cK{k?m<>L!+AYv zAW=Ylc7EY;w*JeTQyS1WTc><(d%4f!3_CNCF!V5EOw8V#$r}3v)#989Bs(OuQ+HY@ z)RaMT-lMhobNH};Y)iQLft2nUx|2BzI0`$DjENj~UI(@v$(7d`FW#po1BU(KKh7lG z!L(%r%lttlEn)@hANED7AFg916rKmw*I^5qn1zp@2r|D|QK?8qbRJ ziRDO{`>MKHGDbI&mI0 zZYwR;TlJATlD>rslI%N|{;{9S??lN)PSjRHfkU8?o8hc%P2wr^z?mTo7lRsm;(KW#0o!Y z>Q24s$Rf$>XDB2tolAfZHaw^zP%&^AM1FF!s><@w_?Hsis?qe@s>Ri)jOlUu_XwG; zJr0jMq{Jccc{)FhvYH6}+!@HDrRodB6?qZ~5XiVWTG`w*NT^&ivedneSI7r;s}L3{vMlkRE3aGbTv<9^8=h%*+>_lu!mr zX*kd{;P<{Mw1<#l zz2u6+LUQtJ)Iemk39t^~y5qQBs& zJ#wgNPsP?AEl*T9KMW%RR8iT92p@JTAj;%b!A02or#-nOqA2|0R{=8q?y1mt_-CV3V7hTd>kLavYkJ7j39D8q7_;sd|K8@3E z`GA_E=D1b{#_T=}mnlhmnC=}WmAy0~>L`E6=!Q4s?t$Vf$V#;=d~_Ad@*3?uy>vw{2-! zydc?AmBL9;nxv2j$W`$1N=TjjRS1Y#XEZWcnMA?64O;_bj1;NZLJhBeX}t(WINt1A zVX6VfeO1qL@0`cKo%}Y#7w?zY{#q#HX5>mf` zbr^kCwq&s)jpTtO>PSG^;DaN5zYZUQq)JNpkWG@lTmz^%R5OK7?J@sFN+OfThWxOJ zjfL|Weh-7vFH@VNOomgxA%mTJd$6+oICZ8j!1~6rbbq40crTe{I{}M?SJB3?>=#sW z`i*XdyRC@y3uEf;RZSHhyx&>50PoPh33FapxsZ8Iz^8HH=_+x&&ZoXT5cf+6_6}>; zqTKPb%zL`aipXLDhBN`dfvHD?oRcd2^+Osd;SwRhG0|`Ia>hL@`=t-kb-N8*q!=fs zUPkjRQ-n5U{{Trq@ic~;nSiDP z*`yG>xjcmREkUHWy$uj42;pogx2G*euA!D1bJ{RQVKOljT~dk^hjB9t(g{<6KGSYU zF@S4UL|sL2;m2NjY)ja}@^i$sCWPDjA&wj=R4bkE9Yovo-$-f;rX`PO3je7un?vbS zU2F+O!(GuTH?E|e{(aO2Qq#+en-(qDPKDD3r9y!;V6YL~2wE7>^fzV%e9(`E-;w-c zzR7I_|0B236q@3py43SCbVWgS?6_fB$kIk0808>=NTemm6Y;?!+Zq#%R*>e@zwguAEl+y zpIKVtyWK{PfJY4>&SZ5%D>%u>DE#tVzHfQ>ypJ!;Z%K^9*H) z+6n&RrS3Pq9K(A;?ruQ9c$9Q1bmK5p7&g3_tSx<-*0TQ?C?G}$TOXQ@8vf=S{bN(l ze`Bl06Pf(e{Yu~NCz>qY-_l9U$2?!{CoBhfa$1c(HS|DLsgdO$UJ?8mAg;1lQKo_) zBXz(n7O=Wtq}*?h;=Xlm-hK9no}&()Y(OYRKr|Sk7=5-@ErW~HkV~Upn9xVqy)zX^-@0?C}{{OtZ`}{a_Mt|B5WIS z&%vaeFNhK60Z48ZuMdDI;)|_tnKUbd-zrN0LuFD55S2~l@FX3Vmck)r=FjPX?Nm;H%M4jI1=@^1`j zBo==XuX+~5@WH(RCA#U^geF0F z9IGBb0>JHn_tErZ;rq#~h{wQH>(0bHgyv*M_f@xr6BWMO5T+;!rYgK~M!^8Raxsad zJ=nG+gRNO10c{g5xC<=i(Re^a>`C`0{HoCSsD8IW*wduN|7PD?xK>ZSft(o zzb|F{YQF3?>^Ga#Zx_V?tomt%Fd_e6lDcekr2M3MzXTgRrV}HfMnzcsMS%WyO8P~B z9v;yhng%&qJ2=2G=Dz_y@U%5>mlpCS1e-tQ=<}3B-~XQ&OcO58Hgb2Y>Vg15w@!1B zP8(puw!zgjRp9!dzut-W_!;d(%ij@|;gfe^+?s{|n=5|c07wa8AYu;BgpR8IwEjUFbk0 zNZCot7OAnNS^3X}rT=DqI7nIG5gI?aEH15n*-wv?5V%P|;K#+y6*2zijli>TG#=k~ zMUr4&68{f|u!~y|;I+7?WC`|H+|#In^FcWKKSg88=~9cT-Xjm&CvH#I+08x2{aKDs zX=)mc)gwxT3!<`d2;~_(P!A91Ub#}i?IUpeJ{Wk}bohiQq@LMcpDxt*GlDF*d8n)h zzhw-L*9*<)ABgof4N>bpMv(i0-Cf>_a@nYfy!ksnl$$d0jv&O8(P1?>3&M_Q zt;*PaG$Onq{^Rn>;{Vs&dB;=v|Np-wT4X0HRI>LTCs_?6J0c+~*?XRlGBR3Z7nKlF z$0lTNvQB2^IkG~>cKlw~Ih1<${(RoI&*%HO-G2R9Znuu>T-WP*j>qHvSULGHsVMo0 z&ksePf*kAf`#(0Ohiw*_b~0cv@jy0nUIi4c|Dwr--e#M8c|bU(9eG6C{6V0M@#=1h z5Q)}nWB<)gdlBrk1@0wQ%)UvIH$)7A7Q|DQ%|S;Q^$8dW_x$~X7)&%Vyh9bOz2A+T z^MBx#r)X==Ol-R(FFmE9y=(3fX|XBWCm6@MwbxP52FvH{DZDjpU90^7^`W%c6z7^v z2w#H}eaS=nDZJ=wwLJlkbJ!u61)3t7>BO;ZwKHkkg)<{t&3e@a@NQ~WY*jZ{rJVm5 z$|EF}v28Vj{5&1RNWqm(b_fOs4c?z<8DOmn0SQjTw=bjw-jw=Fojx=y(MDsQ9`{}j z76>ekqqrS*F>RLF=$D6B1nZw64FMs&NmngIyntQ|KUYar^70wmGHF4qxBTV0)CXJW zn|~nq5loGv!eLFb9Bi{JVdqqCzFS=0DUHH;sn1K94I~5#WK6bz(hRsBGOqi~GvK`L zVH48>RqMKk-u`A^fF|BJ436e@%V;bz5}c*xk;&pC{^ec|%rd}CZ%z`w7C__p1JKQA zrboUwl-X&Ij?OzszUVBkU#GBcaO*)9B`u0rVrJFjubF=@T>Vou3nIAjP5T{Bav zjL5W(t73CeRK-!SfNT$4mAyso%+Y;XvGVg;LggdbbFCNQ3}+N)(mo^G z1k`x)pDagHJ&0M3*2W;E+@4H{WrT?4N|5hpnOicv$hWN-p_Rzp1GuvvheS#%30|Mb zlK1_An!*t^f8!zX>Q5b}o(z}Y8)VVFRHkq^>6Nmelb!pwRRK6q*xJo8&aKkY{#Y1H z)oeMB$FDZ5{b~hr8$;~tZoE0XCMBgez{<>Eb=_af*1NlzZ<_S+>5@B5GlF(FPxP>!3_xp}Aq_DB@1==JmC{ZnagU3DNq-!j<9H z4|dEDjmVHBsBe`_z}P$y@dPF{FhLB`a(j} z!F}o!<$HHX0W1cCd}vvRm~BTunJ5O0B8GPG!fs1HxzQXYM5G0DgrW`;ajCnI=Q{1!!ZA5eA&>E@DBuAXmm4}wI(@$%&e|9Esb77zAZ+n3qxU&Wk)8LoLN zxp36ya<6O8X^IC+r}iHw7S&>M{GN`n~KWmnyra< zO;IU1pltpJ49eky8$zuAwZI*MPy(uxIaBFWW#_4?dfZOktEj>APuNM)Shio@gEok# zzp)W~k*`kvl-&kORR06IT~Nk_Up4JXB?hqM^y{yTh7%HvUoOyrrR$T~p-D6UbUKtW z9=ri(%GI+h#PRHGWC_=)ao|wC_KS*{Xb)g7x5NW1s+j8?IK&l-?>Ju-Ha68H<%e-P zFyI*+E3|F*nbx-E^eXAp;#R2jv={muc2VE7EXR}+x98gPCnE^~1P-#&k0Gv!4iIX@ zm|LJmeZWAQ{W_o%LiaB`$>UEGfzT;6BJ{(LnT1qciy3@2LVK#W!gT3e?k7*UNmn_^_byC5Vau%bBx~~RcPkI zk$BDmy-)JTupfF5O$jtBr|$PXC3r|Z7(3Yl5kkSQ0?n94;eWC5X40li{&$!JpnVrx z?X4~B@ktC|A%M%LX(6UO2gl;U6rkf@_;?Y&`gqa*BOk9P@bT)DD^*lV!KyBz#1d+{ zS_{sTCYorCR!Ex$V5R!sFwQdL0kA90CFLdtg3a^K=EX8xGKVERj(6qI-yv@F z(>O>dEaSPcHs!Cra=B0$LEnxo5#}H5CUs|>xVoJ2u%bk5!^%j`ia^{BJ8=z2F)xIH zuk9_MLD$lm{a?_UfKV7ek7JKL=h1Bs?F$WHGpFRXo<8pJ7Ck#a{QxhY3`m_gVQ0_F zpmfE>8VQ#D(0*|Ko}E2FbKr~CLLa&0K>Sb*P<&h!L)gQwPSgZBYvLbEC*iJ+ehVB) zgf6T$h{r?HhazaT8N4!2vTjqD%*;o}Rl6(xLSO=;@VJ6E3$+G9B+=z9g^OKj=Wq{J zZDI$?<)TP1z8Qp9ru7p(h!%4bGjTWecW=uXz}|8CnW*=P#(tatyx`^)DlVoPS;Gc>$| zPlO+cJ=FF7_)*yv$1Q01B&;Qt^7GbfiHodWDTwlH zSJbhw(x_cP=9_Yn`N7TtT5#qFxX2fAP6;tC<%MVLK)7nL36TAkRSRC0G#@xip21Y{ z=8@E=?S*M#I}koSQ|X%U1n^PRqpo1nEjz}J(cmZDz<}Is*Svr5(?Qch$Hg(}lpnCV zz?Srz$DPF#?m{BoQ=Tf5dd8=tepTWCX6ZlTPyFT3qg=I80ouKPsL%L=nC}XJvI1Ud`oy{Qw9{Z+A2FDK%Ki^N-5jjrAQa9<&ivm|5Z=_TeAWW3EVvve8EPsl)reL*q}Go65wEgwJh2kUi0)D1kV(`s zkn;uPasrq)W{rw+fE;A0+_+0DJVz+dS+$o$vXuztV7Y{}J5$F+M#{?b0*2GCQwtvy@Q)+*WyM7+NAPP8=Hq6En8 z*QrIMu`)3b$QtZ@>Kz{xpbau=fl7O7YctubtFuL00$F=_+EPW8v98@VYzcVAA>P1wC&3!0HFdv$-1LqEY@6 zV8IJVy*lsGvwG_6f3T<+*oHm{y$J@Fuv+ll{IZ4pjf?*&a7W1-8x7Ppdn_a&{QMm{}Yg&{4I&3#I`zj#ibu-ACI#KW445c zN1P+WFy}yC6D4kaSj$*#0lRqQ9xC znMVLM2(y;Zj8X$anMr8u-gVUko#>^Dh169I!*la9n>Ts(o7jD%Q=(s{*Uv9x8_jB6 z{i^VArrWWdBVdPcNmS~ z1NIc|HU}sA&O4Bt8_%qWRGuTcR>a`hAZc4PB#!#&!+z#%YmVs#C#nLlj}Gzke$g$) zcyfr(3Ho zE(s_ezx1=N;x^66ar`!KQy%fSN%nHOk7-p=t+z_o&TWPSWT z$cQ9sU1mjW&eJ*-zXuoR;q6Rgy5e42dRnQEF5?`7LjC>>bjily{rdR?zFe^rSrc*Z zOY;y(PiKmA+H-tO!7Jth3suIhKJm9Vk)p&V_xi$X6|wgy!xc?B`u41|V|P1kXmc5v z%;|RjdZJ-z(C#SMH-|=|%HnmBK4;e@cB5h2%W)-0=b9DLhRfonhxa4qq~GT&Yo-Yu z92x>*BtbG0!oJMXbN?#rrTaNx>KSuTwn)^4%%Ryw9dv+!*c3R~otw|=r2H_k$49SZ zfV&h7!d}K>cGw)Ctpz^Wpbj*Rub;Hv9Ke_dmfci3%ow0cxH^ozHc&(b&S`ych1tgt zm7hjE-W<5_v6{=}#gS&&NFT++_@PG{ZQH0(7q78H+x&~OJ35)|kORz#Lb9gg3dgf8 z>Jr+?Tq}e8D+fiZTc8XpqiCD49FZ zk7TM=coSzw_g(=_pu{O=1A7kV17G$75Eca7)a~wh-;Qg zT-EC|b9mNAN1Dwt5fMx|Qb%D|>f~)anc{6r^%j{TsqkDa`O-QC2ZRguWVq@ncjCsh z(RY0;?HzHSdl%f7jAdH9&u?>I>Qy6b0aF+$S&0!pu-0Ksq!R7t3pA-{O`usl1@|j@ z=yL?OXA!w$F1jMlMUwWU-Ju}R&pP%pzZViODKeh7wR{3pbnobHOi$f^J7A4rh0?7G zN#z~|dLFX`*#-y2G7df#lU7hE#iA$p8n#iE=LwS1+obLb0t_%5#%13aJ(ALPDnEX) zAFAxO0Tr3yg@VR{@%Tp5XrX)6*&cnV%@= zbcDOQ^|zA^&YO!2Vw^}0=>#m$_ z%@in-$TSa98&*eGxQphlnN|j787V7fS~LN_hh?-r1konM0pVC@WRF3r{Sa_k;gAat zw(N0_LLylKLt?i@_XM$*mU?DW0Td+0zu^wCJ{s|EIOsCK^yylyj)7Q%vxgzXrTU(~ zt1`#LowmZ4*s{R1&TdaHc?w{!_MJ{~?-?nHtr8?lT{L zwjWLmt@R^;6y~xOd2O&j4IaDNe{~rbdnEuquAW%NxIuDuhdcJCR{ynjQgkx#0^xoe zN4?<%gl6TNdddY^p6v-<^aUqI;YHq0f%2CYM*81=S|B$Kr*9t}zX zaJHQMgKG`a^9zB?rp7|W1-{D+c5}y{Lw@|Z@^^}U{d*v$hYwIGUlaq|dfFK*k>!O{ zr0(}npPh&2d>4{d=GwhzC|PHm-Y3kGL)aoK1)r0(ssIlF^OqaYo!DS$S@~`VFKnGT zTzCNvp{#B=w5eN>pJzo$9T=?aiy$%T)qdt3g^N#95Oe~O8n`3#(~hjHPutYNXl17p z!sECvEkOCiPQfbVm%xRf{gK?$;EV>*8juB}-f9cA-c4rUh6~F=rcd@^3ebXwpAvsL5M^;j?40wF0HJin{vr^0$;Lw1?$XC?!ENB2(R#{JeRQ=T|Sz9#1{EAGp`d z=IQxfnUE&jw!T72CAr3Db3|(IVGq)|y|&%qZI4YWgy;6&!P@MfVY?P$9!YTG>j%~B zf&viwar=mdwnh7!X!KkZiqFdN0X7^*ik3>pFy2aB6TirqPfj?vzCS#K=5zG)d3_?u zNBsMy9dwK!HHp}4U^Y<=qYp!yOrlArz*W+?xZ6zhX_>~ZS?N-_oxM;cNj)GCW>RP0>PC)uc+(r=&#)D!)qg?tD2nh=_H5?K zN=#!-+JQB@?)stEZjMaj)=pny(?gH-U(Pj>89U-=81IXYuuU$LKBOUVJKJ)BG-FUD z^^^Ptrebv@lE|C=Km~o)MCh@+p$~U7QT(H0=?(r%dp~EfOpMW=DeQTzX24RN)8LvT5`*YfGkP`*PWnq?Hkwy`os(_3%k87f}xt6Jup?|>TNWpEdKob%g zWpnpdiX^5Rg&&GViF2pkuP&S<+-Syv7|1s;S^hH%`OnL4Ar~6Hqb}^HOvyEIdAuAi z3_WC*$EXf}GHOeU;;~=8E+LuYuy^1h`W~EqGwZvporZ&$D@WIu_e&VFJ3{9Ccgb7G zcL!{hOOM35Y+RbMS9xf`Ah>Tq_*QxK%EnuG3w6ZCrVOXvkxz6PdV_wSJ~`M~YccGO zyf+s+JSia zApC%$A|`m6AOY?#STpULQIwZuA|~ zC1SVX^iAiImX)k3Icgaf9q*i%?i`-&OZPh}EJ8{;<0)f+C97GG7%}&jxCwjFwS29m zS=9OMQR^hb_St*Ocb7tMr;>J#m52ok4CfHbMm*%-M;}M1K4KM?Yb5YZ{mvSz4{1tR zqRdjF?$M57F+J}r!ooy$h3*1d!;&a8h~kYR%nB(i7D*f$^d-&g?YDM6#5PRjzS{QA zxXq#P06phkqXVnC*nIXm8toIA1%$i?gkH8cx{Cb?mae81NyTJpSs}(^9c7m%Ju8yr zabe*6WUa*IOq;$en{kWJDBRcRfJ%vQO6k$=LE51H`_H~CijIoj<x(?7p)f-=g=<;8{ zY&a=vB7`hYzr!xT=60b-Y-_AWI4!!~L<@5si&_n?-^!%yM{4cQhW|w?3~9xA8F!HF*f`v>g3Q|CT{etk&}+Qsp2aWtfH|rP=ZhU z*>|2GwBU`L#dTC|j94XR!{zwB4sX^R9UjX^HEhQux%d0$z+pEqHOrD5YHPC$sP=;w zs_IIaI>p-C6Ub`{&0|sN66VT#T5U#|rwm$U;cpJD^WFWvvdeaM(&OCU!6-0qsHm9|i?DKP%8warBdJ(*P8&BXu%1iaU5<=!eB(oq5OWf` z46AATm%@LES#}&@yN!}g$AnH-V*AfYs9iCx6O!% z!ajaozcornD#5m!sq|~;YzR+x8s(mP6_(A6nzq!YU5&_7(i$9hO_szChaL}8{yRhE_ctrn-?Z3YD|Q+1g-LGiqhcuV zlj!LWo41}F&SK3OcECCom%78w*_0AARXJSkuqi$&K~0TjJ z<5cB$pJoEZc$ZT>jn>N_P0-?O^flqOWp^Yh7pz;W9LRI6Ypd&mH-|>e3(hz(Y{rYn zz4tlCX@Gc7)_Gq(zY=onBJxZZ?(zp)w}~| z?Top7meV(7(o@j10`xhoAIII}z2rHpoPu=P?*Ws2h4SQ0(AUp`kNFbk%t* zVlt`Ge#W1pC17K*9?LJFdJ!88-OS;{hKUe>1W zTWKX;AMT2^o8)mpzh}xDC!(Xc#~Nl+7IwF>L#ew*to?XCmwK#P|NSp)rcdWS`7irg zmWpwI`6khOl-Kd(YVLRzOB($wz#z)ie@PSuW!qtJZZoUz4*_<3Hh+0AILE&yf_GtI YOfNI49vQg8cY!|_lE6qigcaASDr75CeZjjcE4L#dqcN=J96*n)Q6-2HfB?8tFnWh zr4rMnD9$0H7B|zW(&SePyqXDfom$ z9Vgwwv-Y3>&US>=c!cUUX3+7G%{k~s9KjQor~%ASYRiFE9r?>)8pEZ7j(&k@{35Zw zsG|I({80*V!Y78Q^7%ZcU#>-wScfZ z=xc6oWcOfGtDT5=}vu|rvz^zVgbz;&fq%;Ld zD$R~`+uM_qYbixesP?;yt4xV^4U%lUEHUm*NU>zy+^;T^+aD#I(tqW!(Z8D-Qi{Jn z+S{qx&1H_btu1M(Ga3-iWj%I~v`^%T-w^8{qbpzo#8z%DqRRzZ&r0HfP zVf}1jK;&*rbwjm(a!ml=Bxdxh2S@SEq6thQ{($WA@Uulxq3;JZdvb{-o%3bSA&k&bkxMfyJ{C(IAD1-Z2#HF{ENaqu zMGnRWvz*uLM@cMBEp@G}9Pmv=ERK!o^C*|1jvIA<+p%>qr0F&_%Ty)$#9eG-zF_RQ z#i;bDc(G90hLICW=H$(Yy&F@7i}Gl+CmHRip8SHx=rbaPjhX2pvbtqo**JqLiZMngfk& zhV~Y}uUX>V;-tU5>b*#KVts^BW_FW(e;QFC7xs;&hK$G(wZHv!OI)d>ersE{Bx`w+ zNDj09ZGCG~&M=}=nQQz+{nk96{DZe%G5Q%N9cNzm*U|IgR0zCHySXzg^FypGfPQ#q z^1G>K(r(oz=GeYLSC1dn5CcmWhZ0AiBL|!GLiCrcRG*FTXa$8rgV|f*=K9l%tgNkk z>vhc-7s5@pGC#|2%X~E=1W~ucninKaZ4wO2A1oQD9_XHLM(1bjDK}v-%e)61#VHYb z-N?5tM5SETb$QD_#vk(xpvyjN&UI=G?TDGBnKfQgg98B)rpninyN@&Lh}^BM>uv>XK+bTk;HUoB6=P2j*#unEoBh zGJ^CAr*Rp=xyzj$s|s7TzX*Mr-dPzo6H-MFN?I~vmeTW!`8KPZnoJK^HShODwy!Bg z?aXZ(wdy7WTo+d+KjYx7$llZ0ORKWwR^!-BR$Nz`MU`)wsOMHpPH*>OZ9v^pi0M3I zNlh8orvrQ=9i8s1;baf`n;xBI2VQ2%OE%06reb|oWm6}XNV`#qa=e56LhRRWrZhy0 zj8Zy&E#7Kx!q6A3pR!(DMSr)nTvjlfASCQWHDgpKcs4SathtA`=4}OPji>dPE|zs2 zQzKY)7K>^OW1at0hWId7q{JDH&$maE<>CLipd_Ib$ z8beMwoJ{+uD7Q^J*4+GYZ;o#)6v&flsSV$1t?zMHC~=#uhZ#hcq2J>!BZzuVt>fav z?%#Jj3pu%QorD(1W+|XAmY402FH*WDwLb2rX1behAhy~D=$v^HKIpPu{>T?HxmF$i zPDMO2`98PpB@We|%RXH})6}JCrhP3rjF89q$59Gf1NG$%q|@xIj4M?XgwtGuwx&5H z7Xsd1%}rHl-d)UIFJiEgvy_i0U=VTqL4TLVxR2MBzAkUY0yC{5adOR^C1m5fGp>jt z1c?O&2HrLd3=Y1T?7UIrS(oa;-T5?X^NhDhXuUz_{7l%x6vD0eJ1iU9?`p|yhx~<~ zDDk7-m>C{qzf)0iwfJ#UJcp({Kki`^ov_MR%b7NLOhsMG!ffhF&u7H4g+9XG zOV)ID0Go(+N_B>ZX+yZm9TSbGW#fG_q%jFS6mlq&(pqt*y}9b7p_<$bdr8}^tW`b{ z|F==~f&(1aDR1(lAD#5+`0lm8DZ&l4ep}-0M_y&Wa^_}`a9RgH>=W~1}$Ry{GRMH@E zh3K^NWUL{+x#UedeA?UI9weP8*0Es>YO@HZU7E_y`(;S!+?|L(DHKI@6~y6WoK-dc zoQ<~oTkSS7@GB3cD6e|&Yvk7}d-dy|GW6{z=Z!?_-KxA7CnqM@?)E&&uslW15TWAA zOvYqKE2mm$Yhae}tYn=n1j+NoQY&G%FvxZZF*_{7<;EO!Q#efM?T_&`#>VK|@+!xK zR9PQgNSBomPPGrWn+YASQ=ffjje9}>f6c!cf+Wk;9VM-m!z;>Xk70m-$@$>jl>&^LC0xsN#22C9OS1+}qp8~}Cbe>CqSI1(w+{JvX z!ngS#=gD&9F{gXo$gr$wV;17r$B$orDDRvNqkBZ#eA!5t0hOyJqH+-Y=%{03?e09X zZ(ESn5Z|XIbrZq%`YR?&gwzssW#{XNn@u?K)0Enf;g&U~e)x93eM9h|Z5ch;)qFaP zs&+np#KZU$LyC9d6CTQMCu zx=6#IMQ6O>C7*W-iE-w9O*A;jCBlTay*QnSMmKW4He58cys|MINN{zYHbNZ-;y*XL z`u)B)m;RI3)VNhM34|rjy%Ugbt(0N}PL|@$#yoruuQvuHG)x4VM>%yN$Tr5!yctp7 z9-Ka)-219<=k}9TKg6*Ya?kF%x@hg+n_6{_(J9S0&w1Gfz7M^T6VV|IIbD{%%cxC{ ze6iiK+2VaEeIhl3Mp{nC-}@*S3Ni^;Ll9q^=*@P5(F5ztDf1aAwd!r-m)`3f<=<0( zL5$KTb~SJHbVZNw-mBP)$w*F}PLIq8dfTk|h3UK{XfKkvo@RMx%*iybA+KEGwa=55 zy*>rTrqdM27X}H6L-VTFFUJ~OeARxVCr^EKjQ@@=;usW*AX+>6R9QUxEt8nvO4wd% zlZ2jF^@qUaus*!eOVX|u+7)@LA-@V-vvPJiRk;Z9i7lXV+m?gN6~wiKN4|)ha7}2?yx4V*?eeNKz4aY)Z_?ihbQ_nm7IYE7KCBk!6K;E5E)4{@;))LCX`Jn`zO zFm8Ew=4WK>5YK0YDxCr@Ws5!mJIxc2lY6-_i|JfJOq-TtqpRDVh}*U>lm8~px6vyM zR3#ps;KpRVcRk~n=_71S#`rB!6yORMoa$NIifeCtcGhN?NSaGsx7pv6qZL(5?KYBE zes!MRW473m) z$Jxe$U9QR2^4u22a|4}eY&)c^eMCaENBOxEb-m3*-xd+DvJ%eC*Rju4tCLwmnb4|p z*W;qerh`Y%arz$P{IT7300*#Z%!1_4-RD}YuE19(``mvD3Jmo05UTM^Gar!qx*xSN zZ8+Y()1cZrT6SG%^MXd(#v|{m-_+&rZly$mzLU4=Y~JkCJS;4nfV#h+kny^tDaQy0 z`T{R3HHTKSlpE|HnJG-nGQ2f?p4yyqJPDDy#%%~L-FUFSPjPzZUVgDIp*GFuzBvvm z$`3vsuYAEGD<_BUo|KvM8(`xTW^@_%4egakeh$=0$jJn6j>4I9HW7Xa3r8W1 z+E{tCs^Yl_%1@aD;VgdfhBl{sJU8LJ!TcDl?TgMxd)MR}pY9BnSOv{-ivX45P^_!9 ze43Y8L&UyE<&iI5M+~nX@4q9eLRxzQt|XaT=#i(>pOaS`nm+mY`=BZKZ7t$s#uT~` znoP)0zfutkPH-#GRe;0vCvou~x#DVtbPLwtK!K9-xS%*f-0B55s7$ctUsT_BSdNYz z-OnhD1eY{c2(=m}8}f58JW$3Bt0jCaM$(H0^vbQb=-6C8L+czzKY6D-sc&NOYW;8P zJdXDAI{+l@prp?m>UIh03*4Vday>q-LCzp16xdXEGwQx?I+IBUwBYigQv@I#Hoy2O z(N71(5fPEmWm@l-I1q-7cUtP6**)eu3Rfj_Uo{GWg^syVNyCcU8!oPUxL$Zq!=^RU zN(81z-6dDLb*5(cnykZ$yR|KHs{+V5}Z%XF>=)T!VQf=_r+{ z;=%LoWfZuzpYC0--ruRh5f*R>ilt7Pi%m1>5KrG1@ZpRmP4o`6R9jvv@p&#T3uT@) z>FCntpAW{%F{t?CF1@cp-|nqCms{Mwf>|!Yq1%Cr-h~BDr2Ohmbct8LZT-%`8OZ60 z@GQl5S%+66M7r(glN#Vk1zoc=?MQWxYZnKx4F{2aH78{*b`dx%p+<88HoXPr{$3lK7v!G*aG4)+oqx_xh+2 z4&2A&ZuvY>VOJONAuQ=`CB9ywYUU09DFxXq(#q7PX{vxW2D2L|9NF8tKuftRCPI< z58%X#_3rR|Hg-%GmryD!&K@;cdkKop-7Aj*(uQCVb7ES3e?6cnQ7A%?$#{RBCKq7I zs*Bzpz4cOidzQ&OvIYJbrQxE=WSD;5C#XkbbTae42AM)i9;FrMsikRIa(Z-VI=MJjw2R zG$tKvV=CbbftTJ}%j0}I66)63cjjG$lzN^)!OM&K{mjDjkRP2)$tVuW2@IrLaipCOnEj{aDCY8 zUDZ(dp`NfkwJebiij}lHE^&k#d&Vx4(TY)cvElaU8A_y}&DU!y`jFZ&;?o>O+THb) zo3)_%f%c5s;uyopl#SB30C%@N=-Y{)M)qP^nmhq-&=s=1ZRL0L#yiHfFK*rgMQi3C zz!-o^P2I*sG3Pi`jzIhm>Iy>58m}nT3~tqU7B=Li-bM*glDEfjNJ~p#7D8|hd9u~;$_t_H21!R`?R|$#VLADP z;e%P1mwFq!_^GGGeM7)lSBb!dYCe9TR%QZaSfQ5CTe!Cz^W`qmKDZ3iQAjz|Mh7UkNH5_af5iLh{xaa@QX_cp7?mA;m#Et<&Vf-vX)o1y|?kB zzGF}(#FbN_V*9MbQBMz?KEg*DnlRXCs@R<@{LV6$KRNwTG>MLp_fZNRw?P{gFbdEtUSiqvnvX(b>Z#k)oz^k&P#ZYMjd@S?{APP0f`$dl*g8@QA zm%Phcd4S8(d`^_l4fK}sFsPL=DbP3FFD~8Lv$5hTN1^uM-Ou9$(B*GDpZ^|ktYsd7 zdaNCmH$rhmq|cLdnF)YbXBb8O1pquHhvf)D4sFL7z6d40mVIgCL2(HB7}#;|O&WS6 zrn-iq|0>CZZjq#Im!6u@8iNbyVxa%Bh9Yi*F;$H2%sbM-R4Pu)n@QHtXcnB!g|pgvp~4BJKiv2xXRbOQlswf;N)&+*o4$FIvld^(L92B6x zeIbOr%UgvL@&mvHHp$l#s+da;+BZ#tA!*F#sMArY-1DdfY$e_KPW(f?NtFO)u-fq{jX%~i=Lcw(R)%F%nm{PIN0$h;%iZXv{>iMmXQLNZJodeU%;KRM%|htwr29YK{jo3QkWkdCXMFydX*&yRCQ4j^yTI*oTQKTQ^3p4P+#G zAieaePtz`rQO)KLTG}8l(JQu$+?+WRNxJBwyZN6{T2vg5H@MTinpcF}@%`-|-ALjN z^^@<@sWh5%IMsWlbFW}Cmo$bm>)v2OJZNh@9ZhU47JQ7_6v!I|PqWxEwC8&c(k)N6 zcr!(%vOixWhGM}Y?&Cw=U=E&|Uu4O49w39TDeE>ReCYP-X!&I@WVzAq; zk3-^;(G>e&0}OAkVsV(ajVQ)R9elP|=zBDw&7*P+iX~$m2sHCQ9QOJacG(AonB=ZU z10vXWHZdUp3&rl3bjY`pK@5-K(krDQW(mDG{-0Z3>6)`i)V|Kg>#kNJwqR3!!xuep z@I8*@9V9X+Vi*m4qTemy;$X*5jh*k%fnSy#3B=Fhn(GWe;j*1VsTV0bbVFd!?fG@& zjo~T3AEog7DWC>(WhKvNv3wlp1>CFNMf|vT(ff$uvz&>l*ZP3shWL?;G!NuXbQxnb z%kJ747LbDf^>DD)BinLzrnL|q?u-w^E}FE6mJhm|9@udpX2xNxHLE7Lq zYm0=fpN{_g8sQPAg!-TTG#{dJhPA@2e#-g7r1nvBkX z<_~f+?wwLN=(eb9pi#}sMG$!1nj>W8Ae@_df7GFyBu0FaRzsPI=?z@g0jt)-^Lc+( zJQ&g7PC!+~;U=*RK{w!X9~^Hnduin|Cb4ktEuxtpjG|Ik`ux|4e}$CeCEf;=-4W+Ce5{U`NWyU3 z7X}QZ=BgV#i+d)Pa>`RgJ`yL#AMoG{b`t@0BvloF*af}rT|5JK~=-=8J4zzqT5x}f2yGQNxMOU-x}z|dkLqIIinx(;Sq zM4Kx5vOE;0(+#7NF)UDPv{VSe<0*9+iF5@HaWa;rZ)%Mdc#zqD?qhs}a|uKz$ZH$h zNI*4s#cV)Cdk7d_bp%-2Z^iBDo9ysxNmQA01CaUT3GOLCO2qkiM&`5MsqHk2+x*=9 zZD>m2VcOiH=fib%+t9)K*->*EE@skKsLJm_qkg1ZF*-> zq$a*<__cAiExCw3urIZtC zN@K&>g7t`21a9kXT8jraRfI=O1+a-1ya{}$&Y+aq5bAc11(K9zWuzg?`FE))@a$B> zev+6dFWmCg^w4r~~C+1K1|%52vsB+lLT z!!id3mF2!UQ9d3D5ss~$zbF-tIi^caPh@xh2#1(mmImS7=1XCBn zOtro$>4JTL(cyob8=Q$IW1u{^FmbpwASc<8+MRnmv^X!Q2sZyS_n5Wdh{NIbi}3ij zfz2;&5uxv=JY;nh2rKpS2E^v&(RQZS_MN1NSDs5p#rG5veI!vkR4{2O*tk88NKgeHj^ zs)l#O`T}EO${Z3MJ9Vr}GbW~tyw`SkQnx+L0AYI`baC}P00r;`>S2zaKAmPQ)-Q6h z?RBwPde^q%!Xwm0P}0CkpjmZpR*7rKpO{W`lXJAYGxFo8M|lTR!liEa8kmGzJOYiM zk)^fAGG5xL=vcPG2|F=mUdu_lG(Xcd55a2>CSWCE3`VEtTTVdL$)Ck{)xf@ek%Nvk zEn~@v%D&clZjIoKL+RF!i(&=Oc{_uE#AT6Z;{)qfD7DEFt zTxOw1wxaLF4ZM9hd+PKSq21@0Lg3fJ5~l!pcLx@F+}Wj>n9`GWZZL5f4)BHfvg1&V z+6&$`?)sn8X7IS-1qX@yzA980+w*M=msT=xoZ=;O$x&olf6z>hjVlw(l!sIkOv?#0 z6OM77a;kXIK7HLn5_cykBGbEA=C^{;-FQ*R-{)~db%$3LBd$hQ^ZzUt z6&wsx7bPm(hoCC3>Oj|^?bb#`Xr2OHCX?d?&{pJs_5-Wre9V3}10_uJjxj>a;#%fy zxD7Cuvg!=D6o0K#D|^qO*w-+h5-e|};t8&?$*JeoCGGyVf0^G8+wmHy`B-RQ=jpCl zA)O!eW>vaiYW~K@dNyzRfWnq@Hny2|qxzHd+BTW>IWV4cx{g1uo>J%CUR(Lir<@{> z0{}P&$B8|G?ZyF?a6V*L%;(;#|G};Zhyg$cq)0WdyVqzidi2lP4q=B$2PU;I`0RU> z4)hIHa$HF_Vu;d15wZt;cQ*=%UG072Pe5G%86^X7v?_9+CxOihJ6C>Vox^+(*Hm!y z{f?$c_AxJ-|KhVPPz6L?z-fgk)|5OB*=HdlUt}UX4oQltlWEc;xAmTl&GilhT)-_7 z5dhzoi8w*x$EWh^d4Cno=r(dRFaMHjv;Vg9IBl&uye4#xLODk0=4($jCPM0CI!d;P!bz;aEui9O&3Oz$EmdZ{lAzyFA|vbDHf!K8Xs0PWJWAx%$lAD0xH5&R-mN+XpT@h3MG#e=tCs5f@}=>m=;h<&?_^n6qr-;oYKfS)q(R0f^EsiW~?!MWB8dl6Qq2$i&$H z4JSmpVT;`I8TxxpD1OSVj?NXwFWwc==|`R+$OFU-$_gd9OboC#_)=1AFL;bULN&A|_*gpT~CgeLkvV}vuHXfQpWNNijzq@LZsJ7K1CV>3%aXT(^T2DGo`GpyX|mG%bORjcIT9K?C)uNcFcRTpv! z2HU;2o-jBqwD?85(S5_xJM0Av0^7W&P%G}00YE}0zUa@0<@tI5u**LMBrL%UKLk<0 zkJ5LJ_)s6tF9!e^cZSO^t<{i95LAROLTWhhB)8eLI$d?xI|iTp5*0G%<|sA_dTe05 z{0m)iv()gyl4FPpCTID%4n_e?+C$}c2*;m8+*&($*Ew{+D=h>&Tu2BrQTz_e2Sxu0 zS+K|SjWQl8K)?Mqn!kvRLo#UhRkfT>+MP`it+v5=QD5(J#a8Q4!K(w_GKmJse~aHItRTH_|jsL4V@o^>6(pB&Dm`R%3oB$ z_Xim|sJb4jKO~(SKi`J;mCyHgnSXgy$V&E?kgC#hEy|p#^!QPAfGb!$;zTB;;JU63 zxAOPs5zm4UZEmec&&h%Q`A^zEagpqti9fsDOLxzf0aXKsXAO?7ME^GiR%}|`9K)c( z;2C~q(C7Hn0vGB6V7ev$XTxardUgMPj4ki~XJOHIhyMN(u5tPp;Y)9NR~Hfw>sz$~ z?Ol6(K%aRwA-Bg{Y9g8+0qH*!aNfWq6Gr=%JMV*EBu1 z3iKS<1yJ!ckHQ}73eqC8>{W7s85jIRXbp{DvHmAh!t`2Zx;)vX*5pzDvxKN){Mmancz^ zvI~I80G(kzCQe4k=;opH5znXiv3oARj7z=mS&2i%?3)V{)Bs7X)Zl!hOnMfyo_L7< z%q8nDALy0;fuugcNb0`=?KSjBsKR)&k;t&Jd05OkGo`1a)!?Bx*yj_ha|W<`*&&Dy z7oKxD4vG#c^)BIKZPE$hN1z*t2?XaGCvnC8@Vu zS^rAn7b_mv+~D7)@az^aH-yAHfJQRuwwO~lxi)~j)Xj7Pl1zC?_I0()-FdIc`%1bX z?o}|)G^Z{iO8!JOp&49YcKUZ6_3QkeT4uMtRkMVk=sfqlGGZ7l!PVvToirEqR7zJ^ zoqu9Rg{)2rjBoF0k2+OI(_L4lp-O!qGpxJzcImD;O84k^zs91Ux3Z5bDZAxl$cIK3 z8QXpB>krqB&#B-*lEShd7K@Bpoq{nBLjaJ+{2gqmizezuG+#yB*@=~_tl+IY2$BBw zg<~4OSg|LMTT`iJYNqGU;|h#ueym$(N#`RUrwwfGMxKhpKVq+W^hv}$V4djeVBh)P zz>W;MjM?g5%O4!f8Fo4-Sv1_`NuZPMGA{e@uKE^uAdZ9lIt9d;#%g;FD$a0Gua(*)hmiQGld&*?NGiV zIPBS)<}-`;TE0r&G4AM)lPg)NcAqhxwY0y?Giq5uhaU=H%uh4VUD6<}1WP}^pd^Bn z*pP06hy&l0qdHvniF4?Uf&wjfA8>RR4e0&sZHF2@31jO-d)*F-QCcs-d)s6w0cA!tV>ZJe6w1#W)oc zppTyNV%f47x!a8tw-(sz+Gr8#sV6kN#kKA+zj_tx-*e%TchsDVEJ$-J?Z0}V_}X~< zwJ7m?j0fQXF_LPBK5rVqvu@Mn_)(4oa4o<|7(2-P1sFd)CEX$y7xyYP?2E0}?eB-H z-80Ftz6=D=p&$L213gCC5~XvFq)^`lnFc|fhLhAO6}=S( zhF#QO{Quc{G8jv6r^x2V^7!|CtumFF`QAOBqlo#%YmJ3!A(FUszquWbG-tUMhZs&r zC9{`HnGTB~-k9ltX2F zId8${|FTY%>wBQ6ZaYIGuWCd4?t{^%c2-TG+fdo!VBz&^AFr6(T4+_=GY%V{!WTqh zVpz4BW!Lmi=e0-$oHPEZz$C1s?>`{t&AcJHfB%_rnpG&6VF8(Q_exMz0*Aj}CnfFH zJ=39D!A_o?Psm*is*Xri_4a<4r1^^%GZ*ZO*`?F;gW^l&OPBQiFpcg9={pWGEW5 zijik;kIV-3gLGGjRhHB-*d6(W&e|b!YUFAYm1B2wz-A#M)8aO2fn`h3wp;xM@`mVN z9I6Brt37J*5u3R9su_yb{bN2`{zpIXGVK@E`FMmi0A5!bGuR|1-ZGqBnXf6q854nR zIk-#fnk(v?d%T?a^ERg^NDPbXhiq7pF1ak zU#fjlpW{E5>+0#MkTY7v`CnP|CY;>j+X$~PDv?JWeD88Z+w+h-f`Q`;>HB0~KVU+h zMP)YFZ#H8b*1oR_J>cOQTC|%xmfO^2JdOBr^3wv^==+n==VM{pZ!Waa0>DM->5&o5 zySik6(68UFaV&bshuHnbq-ahx?6{PS2@rqzkV~84^SF2iSz~Ciez9;s^u;mCMH+Ha z|K)rLl*19(xH%F}5AK0Z+ldaBtxa^c9xxna>z4dYTi7DP}3I#;ZPpiR| zLxRzIp`c`L|FwpSQr5pu6ubk+WC0U<9&{g&L{TR%}&62JZ!{ED567M4u`sBMMj&}|EB z{QqQL$RmfRe}noV z(+)~NqwpvAG;fO)$CIUpEeBI^y-4i3hZ?4bBNQAVt^ z^gcg>2FI`-@Lvy{(Pq>QzaLh}ec?j+25W(e4q`N8sp3?C*`XAPWEWa$(h=W{~*8c55g5tY%=;!rG#^ta;A=hl#~?mWM@Kr2Cs2 zPX>w&NHy=k3^u^EIM(9hI#!%h?gEMtFPl6(hr{+u>9tIY`LV9TR7IUtPM|~ii{nCj zaoKpN3fLtDCKblW-+bvmniutQ8EF+NLK17Y8}~b>WY`X)B+m}Aj`t@2FO5>&K#qU) zY;g2#CPCN2F-TbWf67(Y3WSdz5hhA7T?=`%K6dkqhpcfp`?*s5OIxB+t79b(LaKKW z;tqy#s$MnP`%d{9douw6^^a*hc|)Qi6-=J8J19pgcCBaok;Lzkbw8fozFu{ruJqbO zL6I}DX^4TGm3NP>o}|(!-OD4FtGf1H}6lg69G(A_8f(5 zKkBlz;np5fsTV31Z`OAr=#)s{@ zn{LUM%O6N+x&JxK^zZotz5KuC558Ow7HB=3^<}Igv#+IXf7xb*pyNR50mF^?>X}16 zn1}xmihld?i;?@2HIOt)m=eW`<%QmpvUeHcLJh#sbs5;5ipc;S4AY56$F%?I3;dZj zP>WQQ4fq{4;Eua-{{>*&fSETara?0Vn0lbtI4oPW>^@p4)H})E2AfU5{5`eioKQm# zgJUx08$TWSz+gF0kd-+V-T;$P(O_JEEqn@f71r$@IxJ5XEKk9}h~*!wl&ogCO zUVb25ff(&JMD9DE?K;(+V0TLof}Dn)yY>p|viz%*Ldt$QT=qL=Nx0z{50ZK6z%)lfb_g0$jwmrVpJ8 zuOL~ge`1Lo*D_aI!Y@3r0Ja@xcNW6n#OuOwRW8hYmF*C~4@R`%n1}|sEUk~g z^Dd)?7LhPXX{9H-3xg9tr;lGBWhStJM7<}a1Y!$#4fAaBA?wMYxxg>0ygR2<+LdwGKPG}) zC1_vf66bwb>wO5R#ncxiNCX|abfqj87KZnp{PafH&76T^Avnk=z)Q9Gu~%Hd4@Mpg zPXDoMZlTr6edFI}>1`0zZp6{c3LFm&S-eh-03f#p2=UgdXf>3Nz&fl}udc0iHyoz2 zfgSD+;T-g-{ZMFGX&Q}D0_G^#zjudFuRiX(-cLC4))#Rd&dyu|Q_z1 z)6=lq>)-!ZwF~e|gui7Jm~$ojhxfr(`{_>>a~8|Tx~I_4XuMDM(GQxl1;=7xgOh8;p}@nv<=hhP2{$lF;@=+E7-=W zMIhh>`kw`66l~&|d3FCT*Z+U_T2b`Mxg3!$wAi#cpyDCaYE;fs{wO4Yx8m!)atORA z(%-AVc^N~ptZ;2lSYlKCa%6)DMB zzt~H?keJpv$MKo*?mcX;1fm{)XW)9ue^B3H{55)5+n}-FLx1QTYqWAf$%>TFbnB%E z#Y~qhbWHulKfBfEcew<48S}{D+<-xS>9o52!tqW4dTEj=O?=p-2xmZf(z3^3{gjno zOP{=Jsepei^7AJ_cGycGM0P#2L27tNQ!xo^ZhdwAZ5KsJ2&@4C2WcHQz38d|a4C_7 zr?3kyQNx@#3p*iw?;;$V+(4dy`q*aMzSn}Nj+b5$sG*1Mk|w7D;@{4VKg3-gQhB_f6|0$ab<$P%J`9CEW_-bpaxfXMaJgu)> zJPNTN`iu?8wtB+&c8s(RBtPP*hW!pQIN5Io$J5A1IA-o)j#n{6UznjOAvg>Vt2s#8 z2*DooK3ph|_|RpA_3No1rzwEwmS7L5MbyK01X3bk++djo2Xq|ysgvx<|6>OPI>}?( zi6%Q`3y=f{&f(B&2uvRc9-WaS2ELN*h9|#VdDxlGAqm&=v4ypl*+t-U!bYXcN9Pni z$|ZzcKZGq#_?ip@GvUhg7y7B-^k$ZC{9<2!+15^C2oN`6+4@SHf8k*a34kd$D)mdb zhU33~7nuZM7YMQlMNG^R=v@dAxZ>$fz|RJ9b0F$JpMMf zANefnmCRptYFKB->eR@YiK`de3oQ0xj$&H>f?nXP5L|Z-Mmp>t+G>Da=%HeFW8)lt3^RZ#v$j$ev4-B=RG32v^wVyu z4MqvXzyIR*6kBt4L3O{OUh3e(eC;pJ0BpdB9nD8PW$`Ddj~9FqE}Qtt6(ldK&=de_ zCB2A|P`&xccC@%QD_O#JO;uI1n;?F(5NpR+Stf?`lAB- ze!2o{9Yg_zV}hrLOEp)pw(~u(+^P0Q375&A2q)m)0r81Bs*0fqy*f?<0sRD0bA-De z*hEQ~M6*`4vbW6?z`O&C`+S{gUnU~+sO~%g2|-pCLOr|hEDt78(R_=cuTMZp@EZ7U z$2F*Sp#G$U`>W940!3<+E%#D6eInjM<6DE*$>}__d`bnvwgn(&&}fv}8P5KwGh`>e zNL0-39=i@hH{+Pki>~>#$N^3AuJ@J`f;dr;CgZ1!w+CKv!ku?IBJ8grBrofdqF?Ew z@bm2dDt`OjQm{-ER0?m#yjnFa-qq!~GlzR}u9Nz|rjA%Mz=;OSxmztLG|VHZ1;h5+kqsv|tVU5S9%I7aeQx-O248y(#L<+@#O=Z5vmV(1$UD`VcJ@zPMaIDRc&z5{P`Tm9c4C?X(=4s-UKl3*K>O!3Zgv}Yw@GA`&{dLx^ zuyj6hEy~%L1yT}-r06=dp*nvH?Ub+WCzj5htGG3p-dS6Xp(I?{lSib#7`@T z0(NQlPjocJ{rM~q?0hpgjay--T=4c!mPx2P0u2s8*oB#kvXpBw%-jo92U0-Ggty5Q z5K)!?%`$n{xUx_p`!IN_A^>z)I8X~G@issAZRI-r8LoNz!_ABs8VNqM0Z-@H2QS+< z{x(WtsWM=cq%?etD|Ht{Uz+@34;vu)p#6eS%!lTwxdM#*ikACZWU6r~F`UE1o(Yj< zK47`cB!ssz_1jt)1w?6nTWawzYTZyGXw zhn@3r+lzp71X@l48%}H#%YeICG#o_|Z9?|^42jfD!$yBD5~z>3Gfi;%r@N5!?(cZ% ztHZM&zC5`2k0B&5g<+$nTBh0I4P3xv@e1Efsb)X^(rGz%`0xOJPHJ#S0lmP&+QWe& znlNzu%p_nrPMCJA8pPEcu7RJc1iJ?KqBQ#%%cTy}AdvRW*I(d}6o{3jRwUjs==mKR zg{R#-G4VmtUnf#vPq)F^hHM>tTh2tcYcVuzTECBZC;1l3?}nN;sFvlDf$T1DMj)GR z@JI$?Ng=bdFpG;Ng%;)z7jvw{VG-&7E-`f`tU;QtC7I%x3z;D!bIJwdohWX`zlbCEdXU&tXvm1ms zj{*FBIIj+>3`T+zDSRtn-vbDt?+RKjO+7Oa?>|RE2T&FucB4tjDMUI>9(re-I{jFd znsN_IHGDXc2BpAYMh!kzKZ98#@7>Pq=AQ}D|9DJLBjKv<$3i(bVpoRIM;Rlbl`{fY zuYRM#&ZhlEQogMqx*aj!-yKY0r(vMolVo*Y(TTvlZ$?3PJ6GCc6J~1Znn>ECUlj_l7lrqwbcK zf55Czg6$0zae&iT?m|7jvnLEqYVj)b>JGz}5fuCNe*_NHI8ZWxjQR(o%FeyyKe4|w zC@qt3Am#RNCapU8;Qv8rs1$f=pD9=KI2ijEffHY%NDm@}!K%mZZ~fKTF>!X8PrT0K z=Mzm7*|}|Ks9knUkh&M7eSF@{XHDOFY12{Q)OH*-fYv%!Qh>McK_*gL%*E(I6|Z#Q zSF5Ggk68z90WtlZa^qPF$dQE}zTJt0Y`;?)En2iX!K5-?szlSl>?QgW)kzLDxj#7h zgtG2UWt`lp(;jL>#|d95WzE`h)GnLLyZ*8#*|XCA!TAqI8c+B9%npqPw|^e;2k9Z= z212e4o4bMi%woLiqX(~jHi{iC{lC)AJF3ZSYvVzNVH6Q{6r>pyfl*2jL?o0*QJR2_ zqJ&~WlqxmUL z_5kMR)&lQOUOJ2YO!u>)RoLSiZyv|5idWnZzzg;_@gvPRg8%;(Kk|2(T;^(6)B*^> zz&!7xvi(%A0f1 z$u<^(ue9+zNAOn)hoyZzPMc{?yl~@vZ&x<;h}0qp~7eiwgel%N>0!N^=U zbqAHg3f{J^pnOkyX*5ZZSSg(vG3YTj2=`%b@c@>$`+0v$maeE+Ij#Jq-!~JUDWR2k z@J~V6d=QlVmo44U8hy{Tzw~Fe`D@`FHztY6`hcIOI#O;DoHLeie8E^6p=5MjvoF}i zCmduWDr3}>X1zfu7ETgTvs@|2o|JNR_x`6I-}VDO6(-grx9!wz(*TNzi3NJhI3&&N z^MQLB@ReJb-gxV!BcYfR@^Cf^u!@4S$LKfyNYwah;?KtK-!Soy8`)RENrw0S*2YhJ z6QBA(=V|urBVf#~uzAsA%=7@taf#*WVdEEfL1O_|U=x`4l)Ufe2DFPq0Rq_rR>W(k zjvMN6*k&bp$E31_`>h++cZ=ffc50reP2;P;7k`S6`EO1}YjiIgXn6|+WZG$xB{yHf zL%chS63Ew6@sy=RI2Lmg6w~|u(Nx_+^?d|97`mE0fAuilCln}qk(eGv7mEAgo9nM> zcP7Lm}oWF@U$|QB!iw z7+5zJCf6Z-33W9e2u3Y03_cx*#9)eZu)2bZ)%Fj7Z=RUs>}+s%1FozZ&a)RTkn*bf}J{<1wYy*dNci{GmqCSH7 z#&WWWw1Ys70>(^sbG2u491vd`HyeQ8e12{rllDgHxE_&<`Cn1*Qd{a85} zO!@`?JS1toVcZnWiG>AilhS{EYJ(Uk)v_&PYWgyej>QnhjJ`*ag-tfe8&>WO*x22! z>l?OcW+VtCrM=e)%xVb_fy89ks;t^>w*6XHlyRGFzdu=`&9L(`RkI;s+VFtfPyP3W zg9-mw`Q>y#lj9qSPtVH zho1m6!elInd?w}ksw?pbZOX8a9ruD3h%y9{X2YutQ)-9bmK{BX$;gD-czFwXHrN#hcDS>D!65@de1skV)!O| zKGw9GeCe~y3%C(WOc>bmYnP7s7ed|yW@{ZoPtW+_`aOPKnNLL+;@XRxMjy zZ1}1{1)gD;;oE{@Ymn(YAE~;Z!)tZv3rzj2@r8N!nRK(_fddzPnpI~K9noAq#nMj5 z@ka84?XyBndgi&3SE|EO>TDpEm3JaiM}S4{ zL|r{sAU0s0EBmVlg!P5qp4ZnfkNs%}tl(dU2KvVrXc9W!I*U=%Qf37VD(S0hf8%kp z(h_{-fquOGO|kvLxBlrjF5HihlSf%4raXCdh!#S91oo7n$eq~tAke!RrV+pf#<*0R z&FWyHSmKduVf&eK921K$oPO1;tqG{T|Ktw274LYAp#<%GN>YVLDMui=_a_3x>o3{~ zRk#4po7x|bM0)5h)us$trOr^@2NjRl*spdp`j7dE)#twf)VBoi>(7M+O}p@NfM=eS z+2X;b0b6dTFNay$ya_YiX&5{2)P=%2K&7UN9FdJN{Ml12Y1wJB>&|f8&NVTG)d#Eg z%A-q*L+f7{ab}B?M{zzGqfbd%n4>({+ho)Q{)%?d+is6%L~F}|&EpY!z+svM1W+eR z?$!3YPKO3ve)4B8nvQiqGu3y?_1i6^{X%OxL2k^~5sC$({@};9%ClJu&uszm=`SF+ z0~cf;FIrwn+0koDS8=M}yhR%3vqG%Dnm2B_L{1!DG-Nj^PcQoqJ*ZDd$F7N0{R$|g zy?{b$NWV;Ni!KwB&kJ{a1pCpWU$P?9@z|-g_Nv&+;5?}ZP2;-Ce%q(79=UAq#>eoe zfJha%L-{p3cYJvuWXV+3a82zuh{SPRH!JXj>JWieYi*U+N-&`~w^wHs0>x(ylpptbn7IAZ)S55q2J z;odr$f$l9;-;{6D@Kl{-R<}B3p!+y=b0Xq^T6A|zdc&*9JR5AIq2oOF04FxtZ~_RRXVZ!tiHt|8pvQ>J`Yy?A<@LdI;p_=YFAd z=76@pkcMu@RAIaFsE`Z@8gJ}Q0ol4v;#XdqDQNrk^7uNnMT2uv<=I4f!^4xXuP9hH z=4ZUoo1eWq^Sbf8x%ki@CA5RgB2Rr|SEidL=UT3cV;C9EYUL@+BmY!rFCgaq7H-=h z<^h-!lWo>|Y2DRYwULzBR7JQ-WBr=e8`ivHbF>RI9i+qme4mumVV+hhrtDimlCw(l zn|3xRS;1@u0eQ+FTqw;2aJctNS>y40lV322dBC-Ca+{i1_@G{G1pHI|=ihnF1~dW=9> zVgnn2=;#RAH!4>g?F>5Q^M)tcFuhQQBR<(r%ABNOca1n-z)^KnhKbmx%(9A9oCwwF zscJUo{uq?iDtV>mc9;6{iA(0nmP&TOCdTxanfH8@>p)R=;ffigJjTRAp@z}QtWLAI zd?p`Ga!Q}jpk*n9J^e)?BV=$Y^0>dW&O&L4o5Z>1|6@l;E01f^W(_NANEesu1tteVh3^&eea(5e9d~XeL(7O)Kf&Spa_ zdM*M%^V`j}V-g`ksCOhyz(naUiE&s7D1Z@ZwB}bq!N*0a!b_ z0n_z)Wh-yP>zFPdxs>;q&^Te&{P1QbvM%Q34P;!2gQjYfxkoK%Z?@Gv9W<{HH^N-@ z)NAW2v5P*AubQR`d6UyyC4*Z=MN)Df?nJM54M)ta?z&p`l9BCKuqx*_aTvuFrgQLx zt@7dOupsDI;GmXqX_mGp+Ns$&LS$_8424A+au+%FINbKrRdjyMc$i<3KR`MIUcJRc z>Lb^DgW4KHKM?3nD(W#2R0m4?m1otVK`apa2Iab%N`=xxmrrW7{5>UC%P)U zj*7>+C7nk42*ubn;rou0N##n2v7l*gC(8DNC<#os&~t z43QWYnClp-+hO6DySp}4ZyGmy+~cgdx8s0Q)$t4F3o{>^4dt30iAi9*!5-afDzYmw z{ZbBM&$W)E-=gu4bu>4#9keawA3DB$+CG?J;1zt5oOg$HMMaReOPGgtEB%+Yo7^3t z>n0B;W=Bg8l#b|%<^2rBe6Ac_``Ar5K|HN+Mz9>2)1;#XN@n4~ns#lkQVUdwa?Ij;6QOl{ufzk1aFArD~F|#Qi zaxjOcK!o5Ry(b`Ru)dc*(mSd7!<%t?&-O)z_a)rbEjaUxlKN3B|X8DB)<6vj|)BUia-A_XYbS=N(t(5_d6`?@BUNW^LrP& z!>QPSKop@ZLt8B|fK@R*4oRBso4DkfCk*|Gk#exrHRM_UD=6BLgMg8JhC7mRHTvMR zb1>Y!OyktKyAy(K39*qnJjEg}Pu=CU=i?L&yIWV!*s9|tEou5@GMljQ=h$rnnkQ+6zXmDrNWf*}=ykqmI#sw#ZBYJizcj==GNnr9D& zI$2Jwb)wNm?2tP$>ApZed%k1KDX(u6P+k>8%3s{?ex0%RHvU5Db@_IlaoVav4;+;| z9CCz3(4FPt1-f%Yy!n====DSB{-`)}zxb!?4j)1pqee8naLaUG3k@_w7G_!fc%qtA zQN3Vew(w9+KfpQ!4Cj))$;FUdEyV~H>gJRdGD6r>C*q=xR!ogoE{&$rEmQRR2_?C{ z$MriIW4^2t=eG9v!++`mbGv+O(%d;Jiielugo+A{?x&sIYJVR4apB@Sg|tM^ab;1` z=*RmKiD$3m&t+7a)=!(#YGEHV`G@)*?RmauUaj1h=P}ojzMYi0`>^ffjlD47H$EYd zp%nGbBs*HH4Dabso-Ol&K| zkR_2|0#QC~Z3y8wmC2l!l`Ic6L(^*R^=+e@G{qk|@`>?2NKF?isqdVC`ju|IUi^I3 zs(rB|LawqfjR+%mB>u7wqi@$OIyj!!%wIu{r&U{pqJKYrswxesNKt8T!q0e(-P<8( z4^_m`;_s0A6kLYA{PhOdLwLsr7I=1>LbosU7$m zk zBt4W1Mk?`v?A~ws zKQ9gs2aNAcCVl9EU)bwF-7c`A-i0t67l3P+DW);H#bCMgd27^?y{iIZl`>Ag0aodN10?BWwKUU-?F_;%y@mW8)o z8Xp=iG8W{>p@K+_n;tdPp?yx7yUw3CT|{oF4elC=<426z6P2aM@>`YptPmKF@n^zkh||wGeb#1+$>r1?tG*UO z#cb-ijP@2VedT`fuQEO! zGS$RL-LiUO6A-HWGK7jn%tHu_9I$sr^d{KG*xlYSatb)8&5%=T{wpAF@BY^#74phK cV2P3X&QUaJ Date: Thu, 18 Jun 2020 15:46:33 +0100 Subject: [PATCH 068/129] Center image --- docs/output.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index 376d30b9..dd811b06 100644 --- a/docs/output.md +++ b/docs/output.md @@ -212,10 +212,10 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- ![MultiQC - mosdepth coverage plot](images/mqc_mosdepth_plot.png) -![R - Sample per-amplicon coverage plot](images/r_amplicon_barplot.png) - ![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) +![R - Sample per-amplicon coverage plot](images/r_amplicon_barplot.png) +
    Output files From 58c93110effbcbdcf39c44fb120fb22199d83d4d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 15:54:16 +0100 Subject: [PATCH 069/129] Center image --- docs/output.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/output.md b/docs/output.md index dd811b06..8df84e22 100644 --- a/docs/output.md +++ b/docs/output.md @@ -210,11 +210,11 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- [mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. -![MultiQC - mosdepth coverage plot](images/mqc_mosdepth_plot.png) - ![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) -![R - Sample per-amplicon coverage plot](images/r_amplicon_barplot.png) +

    + R - Sample per-amplicon coverage plot +

    Output files From a6367d05841dc786cf741f12bc99fc2fa21d7b11 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 16:08:38 +0100 Subject: [PATCH 070/129] Reorder sections --- docs/output.md | 72 +++++++++++++++++++++++++------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/docs/output.md b/docs/output.md index 8df84e22..c03db5b3 100644 --- a/docs/output.md +++ b/docs/output.md @@ -67,8 +67,6 @@ If multiple libraries/runs have been provided for the same sample in the input s [FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). -![MultiQC - FastQC per base sequence plot](images/mqc_fastqc_plot.png) -
    Output files @@ -81,12 +79,12 @@ If multiple libraries/runs have been provided for the same sample in the input s
    +![MultiQC - FastQC per base sequence plot](images/mqc_fastqc_plot.png) + ### fastp [fastp](https://github.com/OpenGene/fastp) is a tool designed to provide fast, all-in-one preprocessing for FastQ files. It has been developed in C++ with multithreading support to achieve higher performance. fastp is used in this pipeline for standard adapter trimming and quality filtering. -![MultiQC - fastp filtered reads plot](images/mqc_fastp_plot.png) -
    Output files @@ -106,6 +104,8 @@ If multiple libraries/runs have been provided for the same sample in the input s
    +![MultiQC - fastp filtered reads plot](images/mqc_fastp_plot.png) + ## Variant calling A file called `summary_variants_metrics_mqc.tsv` containing a selection of read and variant calling metrics will be saved in the `variants/` results directory. The same metrics have also been added to the top of the MultiQC report. @@ -114,8 +114,6 @@ A file called `summary_variants_metrics_mqc.tsv` containing a selection of read [Bowtie 2](http://bio-bwa.sourceforge.net/) is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. Bowtie 2 supports gapped, local, and paired-end alignment modes. -![MultiQC - Bowtie2 alignment score plot](images/mqc_bowtie2_plot.png) -
    Output files @@ -126,12 +124,12 @@ A file called `summary_variants_metrics_mqc.tsv` containing a selection of read
    +![MultiQC - Bowtie2 alignment score plot](images/mqc_bowtie2_plot.png) + ### SAMtools Bowtie 2 BAM files are further processed with [SAMtools](http://samtools.sourceforge.net/) to sort them by coordinate, for indexing, as well as to generate read mapping statistics. -![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) -
    Output files @@ -145,12 +143,12 @@ Bowtie 2 BAM files are further processed with [SAMtools](http://samtools.sourcef
    +![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) + ### iVar trim If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pasteur.fr/docs/ivar/1.0/manualpage.html) is used to trim amplicon primer sequences from the aligned reads. iVar uses the primer positions supplied in `--amplicon_bed` to soft clip primer sequences from a coordinate sorted BAM file. -![MultiQC - iVar trim primer heatmap](images/mqc_ivar_trim_plot.png) -
    Output files @@ -166,12 +164,12 @@ If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pa
    +![MultiQC - iVar trim primer heatmap](images/mqc_ivar_trim_plot.png) + ### picard MarkDuplicates Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. By default, the pipeline uses picard MarkDuplicates to *mark* the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. However, you can also choose to remove any reads identified as duplicates via the `--filter_dups` parameter. -![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_duplicates_plot.png) -
    Output files @@ -187,14 +185,12 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-
    +![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_duplicates_plot.png) + ### picard CollectMultipleMetrics [picard-tools](https://broadinstitute.github.io/picard/command-line-overview.html) is a set of command-line tools for manipulating high-throughput sequencing data. We use picard-tools in this pipeline to obtain mapping and coverage metrics. -![MultiQC - Picard whole genome coverage plot](images/mqc_picard_wgs_coverage_plot.png) - -![MultiQC - Picard insert size plot](images/mqc_picard_insert_size_plot.png) -
    Output files @@ -206,15 +202,13 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-
    -### mosdepth +![MultiQC - Picard whole genome coverage plot](images/mqc_picard_wgs_coverage_plot.png) -[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. +![MultiQC - Picard insert size plot](images/mqc_picard_insert_size_plot.png) -![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) +### mosdepth -

    - R - Sample per-amplicon coverage plot -

    +[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts.
    Output files @@ -251,6 +245,12 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-
    +![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) + +

    + R - Sample per-amplicon coverage plot +

    + ### VarScan 2, BCFTools, BEDTools [VarScan 2](http://dkoboldt.github.io/varscan/) is a platform-independent software tool to detect variants in NGS data. In this pipeline, VarScan 2 is used in conjunction with SAMtools in order to call both high and low frequency variants. @@ -259,8 +259,6 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- [BEDTools](https://bedtools.readthedocs.io/en/latest/) is a swiss-army knife of tools for a wide-range of genomics analysis tasks. In this pipeline we use `bedtools genomecov` to compute the per-base mapped read coverage in bedGraph format, and `bedtools maskfasta` to mask sequences in a Fasta file based on intervals defined in a feature file. This may be useful for creating your own masked genome file based on custom annotations or for masking all but your target regions when aligning sequence data from a targeted capture experiment. -![MultiQC - VarScan 2 variants called plot](images/mqc_varscan2_plot.png) -
    Output files @@ -285,12 +283,12 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-
    +![MultiQC - VarScan 2 variants called plot](images/mqc_varscan2_plot.png) + ### iVar variants and iVar consensus [iVar](https://github.com/andersen-lab/ivar/blob/master/docs/MANUAL.md) is a computational package that contains functions broadly useful for viral amplicon-based sequencing. We use iVar in this pipeline to [trim primer sequences](#ivar-trim) for amplicon input data as well as to call variants and for consensus sequence generation. -![MultiQC - iVar variants called plot](images/mqc_ivar_variants_plot.png) -
    Output files @@ -312,12 +310,12 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-
    +![MultiQC - iVar variants called plot](images/mqc_ivar_variants_plot.png) + ### BCFTools and BEDTools [BCFtools](http://samtools.github.io/bcftools/bcftools.html) can be used to call variants directly from BAM alignment files. The functionality to call variants with BCFTools in this pipeline was inspired by work carried out by [Conor Walker](https://github.com/conorwalker/covid19/blob/3cb26ec399417bedb7e60487415c78a405f517d6/scripts/call_variants.sh). In contrast to VarScan 2 and iVar, the original variant calls obtained by BCFTools are not filtered further by a higher allele frequency. It seems that the default calls obtained by BCFTools appear to be comparable with the high frequency variants generated by VarScan 2 and iVar. -![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) -
    Output files @@ -332,14 +330,14 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-
    +![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) + ### SnpEff and SnpSift [SnpEff](http://snpeff.sourceforge.net/SnpEff.html) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). [SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. -![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) -
    Output files @@ -355,6 +353,8 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-
    +![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) + ### QUAST [QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. @@ -377,8 +377,6 @@ A file called `summary_assembly_metrics_mqc.tsv` containing a selection of read In the variant calling branch of the pipeline we are using [iVar trim](#ivar-trim) to remove primer sequences from the aligned BAM files for amplicon data. Since in the *de novo* assembly branch we don't align the reads, we use [Cutadapt](https://cutadapt.readthedocs.io/en/stable/guide.html) as an alternative option to remove and clean the primer sequences directly from FastQ files. -![MultiQC - Cutadapt filtered reads plot](images/mqc_cutadapt_plot.png) -
    Output files @@ -395,14 +393,14 @@ In the variant calling branch of the pipeline we are using [iVar trim](#ivar-tri
    +![MultiQC - Cutadapt filtered reads plot](images/mqc_cutadapt_plot.png) + ### Kraken 2 [Kraken 2](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual) is a sequence classifier that assigns taxonomic labels to DNA sequences. Kraken 2 examines the k-mers within a query sequence and uses the information within those k-mers to query a database. That database maps k-mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer. We used a Kraken 2 database in this workflow to filter out reads specific to the host genome. The remainder of the reads are then passed to numerous *de novo* assembly algorithms in order to reconstruct the viral genome. -![MultiQC - Kraken 2 classification plot](images/mqc_kraken2_plot.png) -
    Output files @@ -415,6 +413,8 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the
    +![MultiQC - Kraken 2 classification plot](images/mqc_kraken2_plot.png) + ### SPAdes [SPAdes](http://cab.spbu.ru/software/spades/) is an assembly toolkit containing various assembly pipelines. Generically speaking, SPAdes is one of the most popular de Bruijn graph-based assembly algorithms used for bacterial/viral genome reconstruction. @@ -538,8 +538,6 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the [QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the *de novo* assemblies across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. -![MultiQC - QUAST contig counts](images/mqc_quast_plot.png) -
    Output files @@ -550,6 +548,8 @@ We used a Kraken 2 database in this workflow to filter out reads specific to the
    +![MultiQC - QUAST contig counts](images/mqc_quast_plot.png) + ### Minimap2, seqwish, vg [Minimap2](https://github.com/lh3/minimap2) is a versatile sequence alignment program that aligns DNA or mRNA sequences against a large reference database. Minimap2 was used to generate all-versus-all alignments between scaffold assembly contigs and the reference genome. From c2e17a11a31b3ae81bc3bf57d3f2121e7343c08b Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 16:27:00 +0100 Subject: [PATCH 071/129] Update example MultiQC report --- docs/html/multiqc_report.html | 222 ++++++++++++++++++++++++++++------ 1 file changed, 187 insertions(+), 35 deletions(-) diff --git a/docs/html/multiqc_report.html b/docs/html/multiqc_report.html index 97033fec..2d2c05b2 100644 --- a/docs/html/multiqc_report.html +++ b/docs/html/multiqc_report.html @@ -23,7 +23,7 @@ MultiQC Report - + @@ -5489,6 +5489,28 @@

    +
  • + VARIANTS: mosdepth + +
  • + + +
  • VARIANTS: VarScan 2
      @@ -6425,12 +6447,12 @@

      JavaScript Disabled

      Report - generated on 2020-05-31, 21:33 + generated on 2020-06-18, 13:38 based on data in: - nf-core/viralrecon/work/fd/3c8e92b3222a535ed19d3834f6d258

      + nfcore/viralrecon/test_full/work/c1/84674b294e014dce8501b3e9ec48ec

      @@ -6827,7 +6849,7 @@

      De novo assembly metrics

      -
      Sample# Input reads# Trimmed reads (Cutadapt)% Non-host reads (Kraken 2)# Contigs (SPAdes)Largest contig (SPAdes)% Genome fraction (SPAdes)N50 (SPAdes)# Contigs (metaSPAdes)Largest contig (metaSPAdes)% Genome fraction (metaSPAdes)N50 (metaSPAdes)# Contigs (Unicycler)Largest contig (Unicycler)% Genome fraction (Unicycler)N50 (Unicycler)# Contigs (minia)Largest contig (minia)% Genome fraction (minia)N50 (minia)# SNPs (SPAdes)# INDELs (SPAdes)# SNPs (metaSPAdes)# INDELs (metaSPAdes)# SNPs (Unicycler)# INDELs (Unicycler)# SNPs (minia)# INDELs (minia)# Missense variants (SPAdes)# Missense variants (metaSPAdes)# Missense variants (Unicycler)# Missense variants (minia)
      sample1
      2755026
      2384570
      100
      3576
      5513
      40
      774
      16
      29962
      100
      29962
      166
      5386
      38
      916
      29
      16188
      99
      15793
      435
      174
      6
      0
      3
      0
      8
      0
      344
      2
      1
      4
      sample2
      2139958
      1913910
      99
      800
      5513
      86
      1070
      17
      29919
      100
      29919
      30
      2400
      83
      1478
      29
      18084
      99
      16188
      88
      26
      7
      0
      7
      1
      6
      0
      64
      5
      5
      5
      + Sample# Input reads# Trimmed reads (Cutadapt)% Non-host reads (Kraken 2)# Contigs (SPAdes)Largest contig (SPAdes)% Genome fraction (SPAdes)N50 (SPAdes)# Contigs (metaSPAdes)Largest contig (metaSPAdes)% Genome fraction (metaSPAdes)N50 (metaSPAdes)# Contigs (Unicycler)Largest contig (Unicycler)% Genome fraction (Unicycler)N50 (Unicycler)# Contigs (minia)Largest contig (minia)% Genome fraction (minia)N50 (minia)# SNPs (SPAdes)# INDELs (SPAdes)# SNPs (metaSPAdes)# INDELs (metaSPAdes)# SNPs (Unicycler)# INDELs (Unicycler)# SNPs (minia)# INDELs (minia)# Missense variants (SPAdes)# Missense variants (metaSPAdes)# Missense variants (Unicycler)# Missense variants (minia)sample1
      2755026
      2384570
      100
      3576
      5513
      40
      774
      16
      29962
      100
      29962
      166
      5386
      38
      916
      29
      16188
      99
      15793
      435
      174
      6
      0
      0
      0
      8
      0
      344
      2
      NA
      4
      sample2
      2139958
      1913910
      99
      800
      5513
      86
      1070
      17
      29919
      100
      29919
      30
      2400
      83
      1478
      29
      18084
      99
      16188
      88
      26
      7
      0
      7
      1
      6
      0
      64
      5
      5
      5
      @@ -12487,12 +12642,7 @@

      the mRNA.

      -
      - - -
      -
      loading..
      -
      +

      Error - was not able to plot data.


      @@ -12916,11 +13066,11 @@

      - - - + + +
      -
      loading..
      +
      loading..

      @@ -12991,10 +13141,10 @@

      - - + +
      -
      loading..
      +
      loading..

      @@ -13091,15 +13241,15 @@

      nf-core/viralrecon Software Versions

      -
      nf-core/viralrecon
      v1.0dev
      +
      nf-core/viralrecon
      v1.1.0dev
      Nextflow
      v20.01.0
      parallel-fastq-dump
      v0.6.6
      FastQC
      v0.11.9
      fastp
      v0.20.1
      -
      Bowtie 2
      v2.3.5.1
      +
      Bowtie 2
      N/A
      Samtools
      v1.9
      BEDTools
      v2.29.2
      -
      Picard
      v2.22.8
      +
      Picard
      v2.23.0
      iVar
      v1.2.2
      VarScan 2
      v2.4.4
      SnpEff
      v4.5covid19
      @@ -13109,7 +13259,7 @@

      nf-core/viralrecon Software Versions

      Kraken2
      v2.0.9-beta
      SPAdes
      v3.14.0
      Unicycler
      v0.4.7
      -
      minia
      v3.2.3
      +
      minia
      v3.2.4
      Minimap2
      v2.17-r941
      vg
      v1.24.0
      BLAST
      v2.9.0+
      @@ -13143,11 +13293,13 @@

      nf-core/viralrecon Workflow Summary

      -
      Run Name
      fervent_majorana
      +
      Run Name
      elated_kilby
      Samplesheet
      https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_amplicon.csv
      Protocol
      amplicon
      Amplicon Fasta File
      https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/amplicon/nCoV-2019.artic.V1.primer.fasta
      Amplicon BED File
      https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/amplicon/nCoV-2019.artic.V1.bed
      +
      Amplicon Left Suffix
      _LEFT
      +
      Amplicon Right Suffix
      _RIGHT
      Viral Genome
      NC_045512.2
      Viral Fasta File
      https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz
      Viral GFF
      https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.gff.gz
      @@ -13167,9 +13319,9 @@

      nf-core/viralrecon Workflow Summary

      Container
      singularity - nfcore-viralrecon-dev.img
      Output dir
      ./results
      Publish dir mode
      copy
      -
      Launch dir
      nf-core/viralrecon/test_full
      -
      Working dir
      nf-core/viralrecon/test_full/work
      -
      Script dir
      nf-core/viralrecon
      +
      Launch dir
      nfcore/viralrecon/test_full
      +
      Working dir
      nfcore/viralrecon/test_full/work
      +
      Script dir
      nfcore/viralrecon
      User
      patelh
      Config Profile
      test_full,crick
      Config Description
      Full test dataset to check pipeline function
      From 16d3ceb018dc3a973b131d17f992191a94b43728 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 18:38:59 +0100 Subject: [PATCH 072/129] Add another example command --- README.md | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ac945803..97260f24 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,7 @@ ## Introduction -**nfcore/viralrecon** is a bioinformatics analysis pipeline used to perform assembly and intra-host/low-frequency variant calling for viral samples. - -The pipeline supports short-read Illumina sequencing data from both shotgun (e.g. sequencing directly from clinical samples) and enrichment-based library preparation methods (e.g. amplicon-based: [ARTIC SARS-CoV-2 enrichment protocol](https://artic.network/ncov-2019); or probe-capture-based). +**nfcore/viralrecon** is a bioinformatics analysis pipeline used to perform assembly and intra-host/low-frequency variant calling for viral samples. The pipeline supports short-read Illumina sequencing data from both shotgun (e.g. sequencing directly from clinical samples) and enrichment-based library preparation methods (e.g. amplicon-based: [ARTIC SARS-CoV-2 enrichment protocol](https://artic.network/ncov-2019); or probe-capture-based). The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with Docker containers making installation trivial and results highly reproducible. Furthermore, automated continuous integration tests that run the pipeline on a full-sized dataset using AWS cloud ensure that the code is stable. @@ -69,8 +67,25 @@ Numerous QC and reporting steps are included in the pipeline in order to collate 4. Start running your own analysis! + * Typical command for shotgun analysis: + + ```bash + nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --genome 'MN908947.3' \ + -profile + ``` + + * Typical command for amplicon analysis: + ```bash - nextflow run nf-core/viralrecon -profile --input samplesheet.csv --genome 'NC_045512.2' -profile docker + nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --genome 'MN908947.3' \ + --protocol amplicon \ + --amplicon_bed ./nCoV-2019.artic.V3.bed \ + --skip_assembly \ + -profile ``` See the [usage documentation](docs/usage.md) for all of the available options when running the pipeline. From 334dc90c9fda518b68e2f51125b88182f8c5aee0 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 18:39:25 +0100 Subject: [PATCH 073/129] Update reference --- docs/usage.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 7fc6c8ed..2d255f4e 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -104,7 +104,7 @@ NXF_OPTS='-Xms1g -Xmx4g' The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/viralrecon --input samplesheet.csv --genome 'NC_045512.2' -profile docker +nextflow run nf-core/viralrecon --input samplesheet.csv --genome 'MN908947.3' -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -222,10 +222,10 @@ Specifies the type of protocol used for sequencing i.e. 'metagenomic' or 'amplic If the `--protocol amplicon` parameter is provided then iVar is used to trim amplicon primer sequences after read alignment and before variant calling. iVar uses the primer positions relative to the viral genome supplied in `--amplicon_bed` to soft clip primer sequences from a coordinate sorted BAM file. The file must be in [BED](https://genome.ucsc.edu/FAQ/FAQformat.html#format1) format as highlighted below: ```bash -NC_045512.2 30 54 nCoV-2019_1_LEFT 60 - -NC_045512.2 385 410 nCoV-2019_1_RIGHT 60 + -NC_045512.2 320 342 nCoV-2019_2_LEFT 60 - -NC_045512.2 704 726 nCoV-2019_2_RIGHT 60 + +MN908947.3 30 54 nCoV-2019_1_LEFT 60 - +MN908947.3 385 410 nCoV-2019_1_RIGHT 60 + +MN908947.3 320 342 nCoV-2019_2_LEFT 60 - +MN908947.3 704 726 nCoV-2019_2_RIGHT 60 + ``` ### `--amplicon_fasta` From c2e594ee078ee5ee209e7d176bfdf6de21b304ec Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 18:39:33 +0100 Subject: [PATCH 074/129] Bug fixes --- main.nf | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) diff --git a/main.nf b/main.nf index faaaa12b..81c4f942 100644 --- a/main.nf +++ b/main.nf @@ -17,7 +17,7 @@ def helpMessage() { The typical command for running the pipeline is as follows: - nextflow run nf-core/viralrecon --input samplesheet.csv --genome 'NC_045512.2' -profile docker + nextflow run nf-core/viralrecon --input samplesheet.csv --genome 'MN908947.3' -profile docker Mandatory arguments --input [file] Comma-separated file containing information about the samples in the experiment (see docs/usage.md) @@ -233,19 +233,11 @@ if (params.gff) summary['Viral GFF'] = params.gff if (params.save_reference) summary['Save Genome Indices'] = 'Yes' if (params.save_sra_fastq) summary['Save SRA FastQ'] = params.save_sra_fastq if (params.skip_sra) summary['Skip SRA Download'] = params.skip_sra -if (!params.skip_kraken2) { - if (params.kraken2_db) summary['Host Kraken2 DB'] = params.kraken2_db - if (params.kraken2_db_name) summary['Host Kraken2 Name'] = params.kraken2_db_name - if (params.kraken2_use_ftp) summary['Kraken2 Use FTP'] = params.kraken2_use_ftp - if (params.save_kraken2_fastq) summary['Save Kraken2 FastQ'] = params.save_kraken2_fastq -} else { - summary['Skip Kraken2'] = 'Yes' -} if (!params.skip_adapter_trimming) { - if (params.cut_mean_quality) summary['Cut Mean Quality'] = params.cut_mean_quality - if (params.qualified_quality_phred) summary['Qualified Phred'] = params.qualified_quality_phred - if (params.unqualified_percent_limit) summary['Unqualified Perc Limit'] = params.unqualified_percent_limit - if (params.min_trim_length) summary['Min Trim Length'] = params.min_trim_length + if (params.cut_mean_quality) summary['Fastp Mean Qual'] = params.cut_mean_quality + if (params.qualified_quality_phred) summary['Fastp Qual Phred'] = params.qualified_quality_phred + if (params.unqualified_percent_limit) summary['Fastp Unqual % Limit'] = params.unqualified_percent_limit + if (params.min_trim_length) summary['Fastp Min Trim Length'] = params.min_trim_length } else { summary['Skip Adapter Trimming'] = 'Yes' } @@ -269,6 +261,14 @@ if (!params.skip_variants) { } else { summary['Skip Variant Calling'] = 'Yes' } +if (!params.skip_kraken2 && !params.skip_assembly) { + if (params.kraken2_db) summary['Host Kraken2 DB'] = params.kraken2_db + if (params.kraken2_db_name) summary['Host Kraken2 Name'] = params.kraken2_db_name + if (params.kraken2_use_ftp) summary['Kraken2 Use FTP'] = params.kraken2_use_ftp + if (params.save_kraken2_fastq) summary['Save Kraken2 FastQ'] = params.save_kraken2_fastq +} else { + summary['Skip Kraken2'] = 'Yes' +} if (!params.skip_assembly) { summary['Assembly Tools'] = params.assemblers summary['Minia Kmer Size'] = params.minia_kmer @@ -393,7 +393,7 @@ if (params.gff) { /* * PREPROCESSING: Uncompress Kraken2 database */ -if (!params.skip_kraken2 && params.kraken2_db) { +if (!params.skip_kraken2 && params.kraken2_db && !params.skip_assembly) { file(params.kraken2_db, checkIfExists: true) if (params.kraken2_db.endsWith('.tar.gz')) { process UNTAR_KRAKEN2_DB { @@ -1862,7 +1862,7 @@ if (!isOffline()) { /* * STEP 6.1: Amplicon trimming with Cutadapt */ -if (params.protocol == 'amplicon' && !params.skip_amplicon_trimming) { +if (params.protocol == 'amplicon' && !params.skip_assembly && !params.skip_amplicon_trimming) { process CUTADAPT { tag "$sample" label 'process_medium' @@ -1874,9 +1874,6 @@ if (params.protocol == 'amplicon' && !params.skip_amplicon_trimming) { else params.save_trimmed ? filename : null } - when: - !params.skip_assembly - input: tuple val(sample), val(single_end), path(reads) from ch_fastp_cutadapt path amplicons from ch_amplicon_fasta @@ -1915,7 +1912,7 @@ if (params.protocol == 'amplicon' && !params.skip_amplicon_trimming) { /* * STEP 6.2: Filter reads with Kraken2 */ -if (!params.skip_kraken2) { +if (!params.skip_kraken2 && !params.skip_assembly) { process KRAKEN2 { tag "$db" label 'process_high' @@ -1925,9 +1922,6 @@ if (!params.skip_kraken2) { else params.save_kraken2_fastq ? filename : null } - when: - !params.skip_assembly - input: tuple val(sample), val(single_end), path(reads) from ch_fastp_kraken2 path db from ch_kraken2_db From 37131c6cedb346bf5780b18bf7e2a0877d3e2ab8 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 18:43:26 +0100 Subject: [PATCH 075/129] Update README --- README.md | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 97260f24..e446f9bf 100644 --- a/README.md +++ b/README.md @@ -69,24 +69,24 @@ Numerous QC and reporting steps are included in the pipeline in order to collate * Typical command for shotgun analysis: - ```bash - nextflow run nf-core/viralrecon \ - --input samplesheet.csv \ - --genome 'MN908947.3' \ - -profile - ``` + ```bash + nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --genome 'MN908947.3' \ + -profile + ``` * Typical command for amplicon analysis: - ```bash - nextflow run nf-core/viralrecon \ - --input samplesheet.csv \ - --genome 'MN908947.3' \ - --protocol amplicon \ - --amplicon_bed ./nCoV-2019.artic.V3.bed \ - --skip_assembly \ - -profile - ``` + ```bash + nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --genome 'MN908947.3' \ + --protocol amplicon \ + --amplicon_bed ./nCoV-2019.artic.V3.bed \ + --skip_assembly \ + -profile + ``` See the [usage documentation](docs/usage.md) for all of the available options when running the pipeline. From 1758e1cf50e265229fec499ebfe829be6d7f2f20 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 18:49:56 +0100 Subject: [PATCH 076/129] Add Artem --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index e446f9bf..f46d5744 100644 --- a/README.md +++ b/README.md @@ -114,6 +114,7 @@ Many thanks to others who have helped out and contributed along the way too, inc | [Aengus Stewart](https://github.com/stewarta) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | | [Alexander Peltzer](https://github.com/apeltzer) | [Boehringer Ingelheim, Germany](https://www.boehringer-ingelheim.de/) | | [Alison Meynert](https://github.com/ameynert) | [University of Edinburgh, Scotland](https://www.ed.ac.uk/) | +| [Artem Babaian](https://github.com/ababaian) | [University of British Columbia, Canada](https://www.ubc.ca/) | | [Edgar Garriga Nogales](https://github.com/edgano) | [Centre for Genomic Regulation, Spain](https://www.crg.eu/) | | [Erik Garrison](https://github.com/ekg) | [UCSC, USA](https://www.ucsc.edu/) | | [Gisela Gabernet](https://github.com/ggabernet) | [QBiC, University of Tübingen, Germany](https://portal.qbic.uni-tuebingen.de/portal/) | From 2f80eb6d5d4f1d7a3797275fab0b8dee1cbf3ab7 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 20:02:53 +0100 Subject: [PATCH 077/129] Add Anton --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index f46d5744..c69901c1 100644 --- a/README.md +++ b/README.md @@ -114,7 +114,9 @@ Many thanks to others who have helped out and contributed along the way too, inc | [Aengus Stewart](https://github.com/stewarta) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | | [Alexander Peltzer](https://github.com/apeltzer) | [Boehringer Ingelheim, Germany](https://www.boehringer-ingelheim.de/) | | [Alison Meynert](https://github.com/ameynert) | [University of Edinburgh, Scotland](https://www.ed.ac.uk/) | +| [Anton Korobeynikov](https://github.com/asl) | [Saint Petersburg State University](https://english.spbu.ru/) | | [Artem Babaian](https://github.com/ababaian) | [University of British Columbia, Canada](https://www.ubc.ca/) | +| Dmitry Meleshko | [Saint Petersburg State University](https://english.spbu.ru/) | | [Edgar Garriga Nogales](https://github.com/edgano) | [Centre for Genomic Regulation, Spain](https://www.crg.eu/) | | [Erik Garrison](https://github.com/ekg) | [UCSC, USA](https://www.ucsc.edu/) | | [Gisela Gabernet](https://github.com/ggabernet) | [QBiC, University of Tübingen, Germany](https://portal.qbic.uni-tuebingen.de/portal/) | From 3063bd9e09ff1956eea0b2acc2b5e077f391ff4f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 20:05:21 +0100 Subject: [PATCH 078/129] Add Dmitry GH link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c69901c1..444ca610 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ Many thanks to others who have helped out and contributed along the way too, inc | [Alison Meynert](https://github.com/ameynert) | [University of Edinburgh, Scotland](https://www.ed.ac.uk/) | | [Anton Korobeynikov](https://github.com/asl) | [Saint Petersburg State University](https://english.spbu.ru/) | | [Artem Babaian](https://github.com/ababaian) | [University of British Columbia, Canada](https://www.ubc.ca/) | -| Dmitry Meleshko | [Saint Petersburg State University](https://english.spbu.ru/) | +| [Dmitry Meleshko](https://github.com/1dayac) | [Saint Petersburg State University](https://english.spbu.ru/) | | [Edgar Garriga Nogales](https://github.com/edgano) | [Centre for Genomic Regulation, Spain](https://www.crg.eu/) | | [Erik Garrison](https://github.com/ekg) | [UCSC, USA](https://www.ucsc.edu/) | | [Gisela Gabernet](https://github.com/ggabernet) | [QBiC, University of Tübingen, Germany](https://portal.qbic.uni-tuebingen.de/portal/) | From 43c9d78884774bfe5e419014c38cf1e8b31c6710 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Thu, 18 Jun 2020 20:06:59 +0100 Subject: [PATCH 079/129] Add country, doh --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 444ca610..cfbe97f4 100644 --- a/README.md +++ b/README.md @@ -114,9 +114,9 @@ Many thanks to others who have helped out and contributed along the way too, inc | [Aengus Stewart](https://github.com/stewarta) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | | [Alexander Peltzer](https://github.com/apeltzer) | [Boehringer Ingelheim, Germany](https://www.boehringer-ingelheim.de/) | | [Alison Meynert](https://github.com/ameynert) | [University of Edinburgh, Scotland](https://www.ed.ac.uk/) | -| [Anton Korobeynikov](https://github.com/asl) | [Saint Petersburg State University](https://english.spbu.ru/) | +| [Anton Korobeynikov](https://github.com/asl) | [Saint Petersburg State University, Russia](https://english.spbu.ru/) | | [Artem Babaian](https://github.com/ababaian) | [University of British Columbia, Canada](https://www.ubc.ca/) | -| [Dmitry Meleshko](https://github.com/1dayac) | [Saint Petersburg State University](https://english.spbu.ru/) | +| [Dmitry Meleshko](https://github.com/1dayac) | [Saint Petersburg State University, Russia](https://english.spbu.ru/) | | [Edgar Garriga Nogales](https://github.com/edgano) | [Centre for Genomic Regulation, Spain](https://www.crg.eu/) | | [Erik Garrison](https://github.com/ekg) | [UCSC, USA](https://www.ucsc.edu/) | | [Gisela Gabernet](https://github.com/ggabernet) | [QBiC, University of Tübingen, Germany](https://portal.qbic.uni-tuebingen.de/portal/) | From 64ad23ec881cdd23c5e781119f12fd4d540568a2 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 08:27:45 +0100 Subject: [PATCH 080/129] Update CHANGELOG --- CHANGELOG.md | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5a782858..14f94c22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,9 +10,19 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#112](https://github.com/nf-core/viralrecon/issues/112) - Per-amplicon coverage plot * [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub * Parameters: - * `--skip_mosdepth` to skip genome-wide and amplicon coverage plot generation from mosdepth output - * `--amplicon_left_suffix` to provide left primer suffix used in name field of `--amplicon_bed` - * `--amplicon_right_suffix` to provide right primer suffix used in name field of `--amplicon_bed` + * `--skip_mosdepth` - skip genome-wide and amplicon coverage plot generation from mosdepth output + * `--amplicon_left_suffix` - to provide left primer suffix used in name field of `--amplicon_bed` + * `--amplicon_right_suffix` - to provide right primer suffix used in name field of `--amplicon_bed` + * Unify parameter specification with COG-UK pipeline: + * `ivar trim` + * `--ivar_exclude_reads` renamed to `--ivar_trim_noprimer_reads` + * `--ivar_trim_min_len` - minimum length of read to retain after primer trimming + * `--ivar_trim_min_qual` - minimum quality threshold for sliding window to pass + * `--ivar_trim_window_width` - width of sliding window + * `ivar variants` + * + * `ivar consensus` + * ### `Removed` From 3b700987280fa949dc123088b1c62a129dc14a6c Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 08:27:53 +0100 Subject: [PATCH 081/129] Add ivar trim params --- docs/usage.md | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 2d255f4e..2c670664 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -37,7 +37,10 @@ * [`--skip_kraken2`](#--skip_kraken2) * [Variant calling](#variant-calling) * [`--callers`](#-callers) - * [`--ivar_exclude_reads`](#--ivar_exclude_reads) + * [`--ivar_trim_noprimer_reads`](#--ivar_trim_noprimer_reads) + * [`--ivar_trim_min_len`](#--ivar_trim_min_len) + * [`--ivar_trim_min_qual`](#--ivar_trim_min_qual) + * [`--ivar_trim_window_width`](#--ivar_trim_window_width) * [`--filter_dups`](#--filter_dups) * [`--filter_unmapped`](#--filter_unmapped) * [`--min_base_qual`](#--min_base_qual) @@ -360,10 +363,22 @@ Skip Kraken 2 process for removing host classified reads (Default: false). Specify which variant calling algorithms you would like to use. Available options are `varscan2`, `ivar` and `bcftools` (Default: 'varscan2,ivar,bcftools'). -### `--ivar_exclude_reads` +### `--ivar_trim_noprimer_reads` This option unsets the `-e` parameter in `ivar trim` to discard reads without primers (Default: false). +### `--ivar_trim_min_len` + +Minimum length of read to retain after trimming (Default: 30). + +### `--ivar_trim_min_qual` + +Minimum quality threshold for sliding window to pass (Default: 20). + +### `--ivar_trim_window_width` + +Width of sliding window (Default: 4). + ### `--filter_dups` Remove duplicate reads from alignments as identified by picard MarkDuplicates (Default: false). Note that unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. From e7abecc9f264f50c48878dc7d7fed261f8e4bb71 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 08:28:00 +0100 Subject: [PATCH 082/129] Add ivar trim params --- nextflow.config | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index a55ab39c..444e8438 100644 --- a/nextflow.config +++ b/nextflow.config @@ -40,7 +40,10 @@ params { // Options: Variant calling callers = 'varscan2,ivar,bcftools' - ivar_exclude_reads = false + ivar_trim_noprimer_reads = false + ivar_trim_min_len = 30 + ivar_trim_min_qual = 20 + ivar_trim_window_width = 4 filter_dups = false filter_unmapped = false min_base_qual = 20 From 7095720f3de315a56ada8fe44b696e2f015382c7 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 08:28:03 +0100 Subject: [PATCH 083/129] Add ivar trim params --- main.nf | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index 81c4f942..9611f97c 100644 --- a/main.nf +++ b/main.nf @@ -57,7 +57,10 @@ def helpMessage() { Variant calling --callers [str] Specify which variant calling algorithms you would like to use (Default: 'varscan2,ivar,bcftools') - --ivar_exclude_reads [bool] Unset -e parameter for iVar trim. Reads with primers are included by default (Default: false) + --ivar_trim_noprimer_reads [bool] Unset -e parameter for iVar trim. Reads with primers are included by default (Default: false) + --ivar_trim_min_len [int] Minimum length of read to retain after trimming (Default: 30) + --ivar_trim_min_qual [int] Minimum quality threshold for sliding window to pass (Default: 20) + --ivar_trim_window_width [int] Width of sliding window (Default: 4) --filter_dups [bool] Remove duplicate reads from alignments as identified by picard MarkDuplicates (Default: false) --filter_unmapped [bool] Remove unmapped reads from alignments (Default: false) --min_base_qual [int] When performing variant calling skip bases with baseQ/BAQ smaller than this number (Default: 20) @@ -245,7 +248,10 @@ if (params.skip_amplicon_trimming) summary['Skip Amplicon Trimming'] = 'Yes' if (params.save_trimmed) summary['Save Trimmed'] = 'Yes' if (!params.skip_variants) { summary['Variant Calling Tools'] = params.callers - if (params.ivar_exclude_reads) summary['iVar Trim Exclude'] = 'Yes' + if (params.ivar_trim_noprimer_reads) summary['iVar Trim Exclude'] = 'Yes' + summary['iVar Trim Min Len'] = params.ivar_trim_min_len + summary['iVar Trim Min Qual'] = params.ivar_trim_min_qual + summary['iVar Trim Window'] = params.ivar_trim_window_width if (params.filter_dups) summary['Remove Duplicate Reads'] = 'Yes' if (params.filter_unmapped) summary['Remove Unmapped Reads'] = 'Yes' summary['Min Base Quality'] = params.min_base_qual @@ -953,7 +959,7 @@ if (params.protocol != 'amplicon') { path "*.log" into ch_ivar_trim_log_mqc script: - exclude_reads = params.ivar_exclude_reads ? "" : "-e" + exclude_reads = params.ivar_trim_noprimer_reads ? "" : "-e" prefix = "${sample}.trim" """ samtools view -b -F 4 ${bam[0]} > ${sample}.mapped.bam @@ -961,8 +967,11 @@ if (params.protocol != 'amplicon') { ivar trim \\ -i ${sample}.mapped.bam \\ - $exclude_reads \\ -b $bed \\ + -m $params.ivar_trim_min_len \\ + -q $params.ivar_trim_min_qual \\ + -s $params.ivar_trim_window_width \\ + $exclude_reads \\ -p $prefix > ${prefix}.ivar.log samtools sort -@ $task.cpus -o ${prefix}.sorted.bam -T $prefix ${prefix}.bam From e13cd6b6f643705c594552ed59248b4688b7453b Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 09:17:11 +0100 Subject: [PATCH 084/129] Update param name --- bin/ivar_variants_to_vcf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index 4860266f..5abe87b9 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -13,7 +13,7 @@ def parse_args(args=None): parser.add_argument('FILE_IN', help="Input tsv file.") parser.add_argument('FILE_OUT', help="Full path to output vcf file.") parser.add_argument('-po', '--pass_only', dest="PASS_ONLY", help="Only output variants that PASS all filters.",action='store_true') - parser.add_argument('-ma', '--min_allele_freq', type=float, dest="MIN_ALLELE_FREQ", default=0, help="Only output variants where allele frequency greater than this number (default: 0).") + parser.add_argument('-af', '--allele_freq_thresh', type=float, dest="ALLELE_FREQ_THRESH", default=0, help="Only output variants where allele frequency greater than this number (default: 0).") return parser.parse_args(args) @@ -96,7 +96,7 @@ def ivar_variants_to_vcf(FileIn,FileOut,passOnly=False,minAF=0): def main(args=None): args = parse_args(args) - ivar_variants_to_vcf(args.FILE_IN,args.FILE_OUT,args.PASS_ONLY,args.MIN_ALLELE_FREQ) + ivar_variants_to_vcf(args.FILE_IN,args.FILE_OUT,args.PASS_ONLY,args.ALLELE_FREQ_THRESH) if __name__ == '__main__': From 0edd1067655858b956dd03dee74dc467d26abf13 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 09:17:44 +0100 Subject: [PATCH 085/129] Add --mpileup_depth param --- docs/usage.md | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/docs/usage.md b/docs/usage.md index 2c670664..9e5106f1 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -37,13 +37,15 @@ * [`--skip_kraken2`](#--skip_kraken2) * [Variant calling](#variant-calling) * [`--callers`](#-callers) - * [`--ivar_trim_noprimer_reads`](#--ivar_trim_noprimer_reads) + * [`--ivar_trim_noprimer`](#--ivar_trim_noprimer) * [`--ivar_trim_min_len`](#--ivar_trim_min_len) * [`--ivar_trim_min_qual`](#--ivar_trim_min_qual) * [`--ivar_trim_window_width`](#--ivar_trim_window_width) * [`--filter_dups`](#--filter_dups) * [`--filter_unmapped`](#--filter_unmapped) + * [`--mpileup_depth`](#--mpileup_depth) * [`--min_base_qual`](#--min_base_qual) + * [`--min_allele_freq`](#--min_allele_freq) * [`--max_allele_freq`](#--max_allele_freq) * [`--amplicon_left_suffix`](#--amplicon_left_suffix) * [`--amplicon_right_suffix`](#--amplicon_right_suffix) @@ -363,13 +365,13 @@ Skip Kraken 2 process for removing host classified reads (Default: false). Specify which variant calling algorithms you would like to use. Available options are `varscan2`, `ivar` and `bcftools` (Default: 'varscan2,ivar,bcftools'). -### `--ivar_trim_noprimer_reads` +### `--ivar_trim_noprimer` This option unsets the `-e` parameter in `ivar trim` to discard reads without primers (Default: false). ### `--ivar_trim_min_len` -Minimum length of read to retain after trimming (Default: 30). +Minimum length of read to retain after trimming (Default: 20). ### `--ivar_trim_min_qual` @@ -387,6 +389,10 @@ Remove duplicate reads from alignments as identified by picard MarkDuplicates (D Remove unmapped reads from alignments (Default: false). +### `--mpileup_depth` + +SAMTools mpileup max per-file depth (Default: 0). See [here](https://github.com/connor-lab/ncov2019-artic-nf/pull/51) for an explanation of the default value choice. + ### `--min_base_qual` When performing variant calling skip bases with baseQ/BAQ smaller than this number (Default: 20). @@ -395,9 +401,13 @@ When performing variant calling skip bases with baseQ/BAQ smaller than this numb When performing variant calling skip positions with an overall read depth smaller than this number (Default: 10). +### `--min_allele_freq` + +Minimum allele frequency threshold for calling variants (Default: 0.25). + ### `--max_allele_freq` -Maximum allele frequency threshold for filtering variant calls (Default: 0.8). +Maximum allele frequency threshold for filtering variant calls (Default: 0.75). ### `--amplicon_left_suffix` From 23c8fe4b55c7bc113b9bead698b26fea7d28784a Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 09:17:54 +0100 Subject: [PATCH 086/129] Add --mpileup_depth param --- main.nf | 23 +++++++++++++---------- nextflow.config | 8 +++++--- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/main.nf b/main.nf index 9611f97c..6689d659 100644 --- a/main.nf +++ b/main.nf @@ -57,15 +57,17 @@ def helpMessage() { Variant calling --callers [str] Specify which variant calling algorithms you would like to use (Default: 'varscan2,ivar,bcftools') - --ivar_trim_noprimer_reads [bool] Unset -e parameter for iVar trim. Reads with primers are included by default (Default: false) - --ivar_trim_min_len [int] Minimum length of read to retain after trimming (Default: 30) + --ivar_trim_noprimer [bool] Unset -e parameter for iVar trim. Reads with primers are included by default (Default: false) + --ivar_trim_min_len [int] Minimum length of read to retain after trimming (Default: 20) --ivar_trim_min_qual [int] Minimum quality threshold for sliding window to pass (Default: 20) --ivar_trim_window_width [int] Width of sliding window (Default: 4) --filter_dups [bool] Remove duplicate reads from alignments as identified by picard MarkDuplicates (Default: false) --filter_unmapped [bool] Remove unmapped reads from alignments (Default: false) + --mpileup_depth [int] SAMTools mpileup max per-file depth (Default: 0) --min_base_qual [int] When performing variant calling skip bases with baseQ/BAQ smaller than this number (Default: 20) --min_coverage [int] When performing variant calling skip positions with an overall read depth smaller than this number (Default: 10) - --max_allele_freq [float] Maximum allele frequency threshold for filtering variant calls (Default: 0.8) + --min_allele_freq [float] Minimum allele frequency threshold for calling variants (Default: 0.25) + --max_allele_freq [float] Maximum allele frequency threshold for filtering variant calls (Default: 0.75) --amplicon_left_suffix [str] Suffix used in name field of --amplicon_bed to indicate left primer position (Default: '_LEFT') --amplicon_right_suffix [str] Suffix used in name field of --amplicon_bed to indicate right primer position (Default: '_RIGHT') --save_align_intermeds [bool] Save the intermediate BAM files from the alignment steps (Default: false) @@ -248,7 +250,7 @@ if (params.skip_amplicon_trimming) summary['Skip Amplicon Trimming'] = 'Yes' if (params.save_trimmed) summary['Save Trimmed'] = 'Yes' if (!params.skip_variants) { summary['Variant Calling Tools'] = params.callers - if (params.ivar_trim_noprimer_reads) summary['iVar Trim Exclude'] = 'Yes' + if (params.ivar_trim_noprimer) summary['iVar Trim Exclude'] = 'Yes' summary['iVar Trim Min Len'] = params.ivar_trim_min_len summary['iVar Trim Min Qual'] = params.ivar_trim_min_qual summary['iVar Trim Window'] = params.ivar_trim_window_width @@ -256,6 +258,7 @@ if (!params.skip_variants) { if (params.filter_unmapped) summary['Remove Unmapped Reads'] = 'Yes' summary['Min Base Quality'] = params.min_base_qual summary['Min Read Depth'] = params.min_coverage + summary['Min Allele Freq'] = params.min_allele_freq summary['Max Allele Freq'] = params.max_allele_freq if (params.save_align_intermeds) summary['Save Align Intermeds'] = 'Yes' if (params.save_mpileup) summary['Save mpileup'] = 'Yes' @@ -959,7 +962,7 @@ if (params.protocol != 'amplicon') { path "*.log" into ch_ivar_trim_log_mqc script: - exclude_reads = params.ivar_trim_noprimer_reads ? "" : "-e" + exclude_reads = params.ivar_trim_noprimer ? "" : "-e" prefix = "${sample}.trim" """ samtools view -b -F 4 ${bam[0]} > ${sample}.mapped.bam @@ -1242,7 +1245,7 @@ process SAMTOOLS_MPILEUP { samtools mpileup \\ --count-orphans \\ --no-BAQ \\ - --max-depth 50000 \\ + --max-depth $params.mpileup_depth \\ --fasta-ref $fasta \\ --min-BQ $params.min_base_qual \\ --output ${prefix}.mpileup \\ @@ -1287,7 +1290,7 @@ process VARSCAN2 { --min-coverage $params.min_coverage \\ --min-reads2 5 \\ --min-avg-qual $params.min_base_qual \\ - --min-var-freq 0.03 \\ + --min-var-freq $params.min_allele_freq \\ --p-value 0.99 \\ --output-vcf 1 \\ --vcf-sample-list sample_name.list \\ @@ -1489,7 +1492,7 @@ process IVAR_VARIANTS { features = params.gff ? "-g $gff" : "" prefix = "${sample}.AF${params.max_allele_freq}" """ - cat $mpileup | ivar variants -q $params.min_base_qual -t 0.03 -m $params.min_coverage -r $fasta -p $sample $features + cat $mpileup | ivar variants -q $params.min_base_qual -t $params.min_allele_freq -m $params.min_coverage -r $fasta $features -p $sample ivar_variants_to_vcf.py ${sample}.tsv ${sample}.vcf > ${sample}.variant.counts.log bgzip -c ${sample}.vcf > ${sample}.vcf.gz @@ -1497,7 +1500,7 @@ process IVAR_VARIANTS { bcftools stats ${sample}.vcf.gz > ${sample}.bcftools_stats.txt cat $header ${sample}.variant.counts.log > ${sample}.variant.counts_mqc.tsv - ivar_variants_to_vcf.py ${sample}.tsv ${prefix}.vcf --pass_only --min_allele_freq $params.max_allele_freq > ${prefix}.variant.counts.log + ivar_variants_to_vcf.py ${sample}.tsv ${prefix}.vcf --pass_only --allele_freq_thresh $params.max_allele_freq > ${prefix}.variant.counts.log bgzip -c ${prefix}.vcf > ${prefix}.vcf.gz tabix -p vcf -f ${prefix}.vcf.gz bcftools stats ${prefix}.vcf.gz > ${prefix}.bcftools_stats.txt @@ -1668,7 +1671,7 @@ process BCFTOOLS_VARIANTS { bcftools mpileup \\ --count-orphans \\ --no-BAQ \\ - --max-depth 50000 \\ + --max-depth $params.mpileup_depth \\ --fasta-ref $fasta \\ --min-BQ $params.min_base_qual \\ --annotate FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/DP,FORMAT/SP,INFO/AD,INFO/ADF,INFO/ADR \\ diff --git a/nextflow.config b/nextflow.config index 444e8438..6bfe201a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -40,15 +40,17 @@ params { // Options: Variant calling callers = 'varscan2,ivar,bcftools' - ivar_trim_noprimer_reads = false - ivar_trim_min_len = 30 + ivar_trim_noprimer = false + ivar_trim_min_len = 20 ivar_trim_min_qual = 20 ivar_trim_window_width = 4 filter_dups = false filter_unmapped = false + mpileup_depth = 0 min_base_qual = 20 min_coverage = 10 - max_allele_freq = 0.8 + min_allele_freq = 0.25 + max_allele_freq = 0.75 amplicon_left_suffix = '_LEFT' amplicon_right_suffix = '_RIGHT' save_align_intermeds = false From 085572ae6d4e75167c89600b6163050dcf3f7def Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 09:17:59 +0100 Subject: [PATCH 087/129] Add --mpileup_depth param --- CHANGELOG.md | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 14f94c22..19779a91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,15 +14,12 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * `--amplicon_left_suffix` - to provide left primer suffix used in name field of `--amplicon_bed` * `--amplicon_right_suffix` - to provide right primer suffix used in name field of `--amplicon_bed` * Unify parameter specification with COG-UK pipeline: - * `ivar trim` - * `--ivar_exclude_reads` renamed to `--ivar_trim_noprimer_reads` - * `--ivar_trim_min_len` - minimum length of read to retain after primer trimming - * `--ivar_trim_min_qual` - minimum quality threshold for sliding window to pass - * `--ivar_trim_window_width` - width of sliding window - * `ivar variants` - * - * `ivar consensus` - * + * `--min_allele_freq` - minimum allele frequency threshold for calling variants + * `--mpileup_depth` - SAMTools mpileup max per-file depth + * `--ivar_exclude_reads` renamed to `--ivar_trim_noprimer` + * `--ivar_trim_min_len` - minimum length of read to retain after primer trimming + * `--ivar_trim_min_qual` - minimum quality threshold for sliding window to pass + * `--ivar_trim_window_width` - width of sliding window ### `Removed` From 4461c408701db0ba78264092632e45c93f83ba4c Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 09:23:21 +0100 Subject: [PATCH 088/129] Add param to summary --- main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/main.nf b/main.nf index 6689d659..2bd80c34 100644 --- a/main.nf +++ b/main.nf @@ -256,6 +256,7 @@ if (!params.skip_variants) { summary['iVar Trim Window'] = params.ivar_trim_window_width if (params.filter_dups) summary['Remove Duplicate Reads'] = 'Yes' if (params.filter_unmapped) summary['Remove Unmapped Reads'] = 'Yes' + summary['Mpileup Depth'] = params.mpileup_depth summary['Min Base Quality'] = params.min_base_qual summary['Min Read Depth'] = params.min_coverage summary['Min Allele Freq'] = params.min_allele_freq From 9e3336c03306980283e8cd3724cbfe4c19848e89 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 10:55:01 +0100 Subject: [PATCH 089/129] Bump versions --- .github/workflows/ci.yml | 8 ++++---- CHANGELOG.md | 2 +- Dockerfile | 4 ++-- environment.yml | 2 +- nextflow.config | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bec41d00..35f8ec7f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -41,7 +41,7 @@ jobs: if: ${{ !env.GIT_DIFF }} run: | docker pull nfcore/viralrecon:dev - docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev + docker tag nfcore/viralrecon:dev nfcore/viralrecon:1.1.0 - name: Install Nextflow run: | @@ -81,7 +81,7 @@ jobs: if: ${{ !env.GIT_DIFF }} run: | docker pull nfcore/viralrecon:dev - docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev + docker tag nfcore/viralrecon:dev nfcore/viralrecon:1.1.0 - name: Install Nextflow run: | @@ -121,7 +121,7 @@ jobs: if: ${{ !env.GIT_DIFF }} run: | docker pull nfcore/viralrecon:dev - docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev + docker tag nfcore/viralrecon:dev nfcore/viralrecon:1.1.0 - name: Install Nextflow run: | @@ -161,7 +161,7 @@ jobs: if: ${{ !env.GIT_DIFF }} run: | docker pull nfcore/viralrecon:dev - docker tag nfcore/viralrecon:dev nfcore/viralrecon:dev + docker tag nfcore/viralrecon:dev nfcore/viralrecon:1.1.0 - name: Install Nextflow run: | diff --git a/CHANGELOG.md b/CHANGELOG.md index 19779a91..6e0b07e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [Unpublished Version / DEV] +## [1.1.0] - 2020-06-22 ### `Added` diff --git a/Dockerfile b/Dockerfile index 4752f69c..4d994949 100644 --- a/Dockerfile +++ b/Dockerfile @@ -10,10 +10,10 @@ RUN conda env create -f /environment.yml && conda clean -a RUN apt-get install -y libgl1-mesa-glx && apt-get clean -y # Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-viralrecon-1.1.0dev/bin:$PATH +ENV PATH /opt/conda/envs/nf-core-viralrecon-1.1.0/bin:$PATH # Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-viralrecon-1.1.0dev > nf-core-viralrecon-1.1.0dev.yml +RUN conda env export --name nf-core-viralrecon-1.1.0 > nf-core-viralrecon-1.1.0.yml # Instruct R processes to use these empty files instead of clashing with a local version RUN touch .Rprofile diff --git a/environment.yml b/environment.yml index 29cc35ca..8edb5f53 100644 --- a/environment.yml +++ b/environment.yml @@ -1,6 +1,6 @@ # You can use this file to create a conda environment for this pipeline: # conda env create -f environment.yml -name: nf-core-viralrecon-1.1.0dev +name: nf-core-viralrecon-1.1.0 channels: - conda-forge - bioconda diff --git a/nextflow.config b/nextflow.config index 6bfe201a..8f24e167 100644 --- a/nextflow.config +++ b/nextflow.config @@ -104,7 +104,7 @@ params { // Container slug. Stable releases should specify release tag! // Developmental code should specify :dev -process.container = 'nfcore/viralrecon:dev' +process.container = 'nfcore/viralrecon:1.1.0' // Load base.config by default for all pipelines includeConfig 'conf/base.config' @@ -179,7 +179,7 @@ manifest { description = 'Assembly and intrahost/low-frequency variant calling for viral samples' mainScript = 'main.nf' nextflowVersion = '>=19.10.0' - version = '1.1.0dev' + version = '1.1.0' } // Function to ensure that resource requirements don't go beyond From 7ebef3c862529bad3b8a783f55112d6481b35e9f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 11:04:59 +0100 Subject: [PATCH 090/129] Update CHANGELOG --- CHANGELOG.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e0b07e9..46824d99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,8 +23,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Removed` -* Parameters: - * `--skip_qc` +* `--skip_qc` parameter ### `Dependencies` From 43bf6845127126e8fa44a4b6a48a139bd3a456d0 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 13:14:54 +0100 Subject: [PATCH 091/129] BCFTools isec implementation --- CHANGELOG.md | 1 + README.md | 1 + docs/output.md | 18 ++++++++++++++++++ main.nf | 47 +++++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 63 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 46824d99..35189663 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### `Added` * [#112](https://github.com/nf-core/viralrecon/issues/112) - Per-amplicon coverage plot +* [#124](https://github.com/nf-core/viralrecon/issues/124) - Intersect variants across callers * [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub * Parameters: * `--skip_mosdepth` - skip genome-wide and amplicon coverage plot generation from mosdepth output diff --git a/README.md b/README.md index cfbe97f4..7d708da0 100644 --- a/README.md +++ b/README.md @@ -31,6 +31,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool 7. Choice of multiple variant calling and consensus sequence generation routes ([`VarScan 2`](http://dkoboldt.github.io/varscan/), [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/) *||* [`iVar variants and consensus`](https://github.com/andersen-lab/ivar) *||* [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/)) * Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) * Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) + 8. Intersect variants across callers ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)) 6. _De novo_ assembly 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); *amplicon data only*) 2. Removal of host reads ([`Kraken 2`](http://ccb.jhu.edu/software/kraken2/)) diff --git a/docs/output.md b/docs/output.md index c03db5b3..b6d28fb8 100644 --- a/docs/output.md +++ b/docs/output.md @@ -23,6 +23,7 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d * [VarScan 2, BCFTools, BEDTools](#varscan-2-bcftools-bedtools) *||* [iVar variants and iVar consensus](#ivar-variants-and-ivar-consensus) *||* [BCFTools and BEDTools](#bcftools-and-bedtools) - Variant calling and consensus sequence generation * [SnpEff and SnpSift](#snpeff-and-snpsift) - Genetic variant annotation and functional effect prediction * [QUAST](#quast) - Consensus assessment report + * [BCFTools isec](#bcftools-isec) - Intersect variants across all callers * [De novo assembly](#de-novo-assembly) * [Cutadapt](#cutadapt) - Primer trimming for amplicon data * [Kraken 2](#kraken-2) - Removal of host reads @@ -369,6 +370,23 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-
  • +### BCFTools isec + +[BCFTools isec](http://samtools.github.io/bcftools/bcftools.html#isec) can be used to intersect the variant calls generated by the 3 different callers used in the pipeline. This permits a quick assessment of how consistently a particular variant is being called using different algorithms and to prioritise the investigation of the variants. + +
    + Output files + +* `variants/intersect//` + * `*.vcf.gz`: VCF file containing variants common to at least 2/3 callers. There will be one file for each caller - see `README.txt` for details. + * `*.vcf.gz.tbi`: Index for VCF file. + * `README.txt`: File containing command used and file name mappings. + * `sites.txt`: List of variants common to at least 2/3 callers in textual format. The last column indicates presence (1) or absence (0) amongst the 3 different callers. + +> **NB:** This process will only executed when all 3 variant callers are specified to run, as is by default i.e. `--callers varscan2,ivar,bcftools`. + +
    + ## De novo assembly A file called `summary_assembly_metrics_mqc.tsv` containing a selection of read and *de novo* assembly related metrics will be saved in the `assembly/` results directory. The same metrics have also been added to the top of the MultiQC report. diff --git a/main.nf b/main.nf index 2bd80c34..5de8ea2c 100644 --- a/main.nf +++ b/main.nf @@ -1186,7 +1186,6 @@ if (params.protocol == 'amplicon') { } process MOSDEPTH_AMPLICON_PLOT { - tag "$sample" label 'process_medium' publishDir "${params.outdir}/variants/bam/mosdepth/amplicon/plots", mode: params.publish_dir_mode @@ -1276,7 +1275,8 @@ process VARSCAN2 { output: tuple val(sample), val(single_end), path("${prefix}.vcf.gz*") into ch_varscan2_highfreq_consensus, - ch_varscan2_highfreq_snpeff + ch_varscan2_highfreq_snpeff, + ch_varscan2_highfreq_intersect tuple val(sample), val(single_end), path("${sample}.vcf.gz*") into ch_varscan2_lowfreq_snpeff path "${prefix}.bcftools_stats.txt" into ch_varscan2_bcftools_highfreq_mqc path "*.varscan2.log" into ch_varscan2_log_mqc @@ -1481,7 +1481,8 @@ process IVAR_VARIANTS { path gff from ch_gff output: - tuple val(sample), val(single_end), path("${prefix}.vcf.gz*") into ch_ivar_highfreq_snpeff + tuple val(sample), val(single_end), path("${prefix}.vcf.gz*") into ch_ivar_highfreq_snpeff, + ch_ivar_highfreq_intersect tuple val(sample), val(single_end), path("${sample}.vcf.gz*") into ch_ivar_lowfreq_snpeff path "${prefix}.bcftools_stats.txt" into ch_ivar_bcftools_highfreq_mqc path "${sample}.variant.counts_mqc.tsv" into ch_ivar_count_mqc @@ -1663,7 +1664,8 @@ process BCFTOOLS_VARIANTS { output: tuple val(sample), val(single_end), path("*.vcf.gz*") into ch_bcftools_variants_consensus, - ch_bcftools_variants_snpeff + ch_bcftools_variants_snpeff, + ch_bcftools_variants_intersect path "*.bcftools_stats.txt" into ch_bcftools_variants_mqc script: @@ -1801,6 +1803,43 @@ process BCFTOOLS_QUAST { """ } +//////////////////////////////////////////////////// +/* -- INTERSECT VARIANTS -- */ +//////////////////////////////////////////////////// + +/* + * STEP 5.8: Intersect variants with BCFTools + */ +if (!params.skip_variants && callers.size() > 2) { + + ch_varscan2_highfreq_intersect + .join(ch_ivar_highfreq_intersect, by: [0,1]) + .join(ch_bcftools_variants_intersect, by: [0,1]) + .set { ch_varscan2_highfreq_intersect } + + process BCFTOOLS_ISEC { + tag "$sample" + label 'process_medium' + label 'error_ignore' + publishDir "${params.outdir}/variants/intersect", mode: params.publish_dir_mode + + input: + tuple val(sample), val(single_end), path('varscan2/*'), path('ivar/*'), path('bcftools/*') from ch_varscan2_highfreq_intersect + + output: + path "$sample" + + script: + """ + bcftools isec \\ + --nfiles +2 \\ + --output-type z \\ + -p $sample \\ + */*.vcf.gz + """ + } +} + /////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// /* -- -- */ From 608872aaca667e76fcc534a2292a6241e38120f1 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 13:29:05 +0100 Subject: [PATCH 092/129] Switch off strand filter --- docs/output.md | 2 +- main.nf | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index b6d28fb8..3b7bb809 100644 --- a/docs/output.md +++ b/docs/output.md @@ -383,7 +383,7 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- * `README.txt`: File containing command used and file name mappings. * `sites.txt`: List of variants common to at least 2/3 callers in textual format. The last column indicates presence (1) or absence (0) amongst the 3 different callers. -> **NB:** This process will only executed when all 3 variant callers are specified to run, as is by default i.e. `--callers varscan2,ivar,bcftools`. +> **NB:** This process will only be executed when all 3 variant callers are specified to run, as is by default i.e. `--callers varscan2,ivar,bcftools`.
    diff --git a/main.nf b/main.nf index 5de8ea2c..01d6e4e2 100644 --- a/main.nf +++ b/main.nf @@ -1294,6 +1294,7 @@ process VARSCAN2 { --min-var-freq $params.min_allele_freq \\ --p-value 0.99 \\ --output-vcf 1 \\ + --strand-filter 0 \\ --vcf-sample-list sample_name.list \\ --variants \\ 2> ${sample}.varscan2.log \\ @@ -1816,7 +1817,7 @@ if (!params.skip_variants && callers.size() > 2) { .join(ch_ivar_highfreq_intersect, by: [0,1]) .join(ch_bcftools_variants_intersect, by: [0,1]) .set { ch_varscan2_highfreq_intersect } - + process BCFTOOLS_ISEC { tag "$sample" label 'process_medium' From 1a48306a06bd54b6e73a25a8b21800c3c5398c5f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 13:51:44 +0100 Subject: [PATCH 093/129] Add varscan2_strand_filter parameter --- CHANGELOG.md | 1 + docs/usage.md | 5 +++++ main.nf | 7 +++++-- nextflow.config | 1 + 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 35189663..f29f209c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#124](https://github.com/nf-core/viralrecon/issues/124) - Intersect variants across callers * [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub * Parameters: + * `--varscan2_strand_filter` to toggle the default Varscan 2 strand filter * `--skip_mosdepth` - skip genome-wide and amplicon coverage plot generation from mosdepth output * `--amplicon_left_suffix` - to provide left primer suffix used in name field of `--amplicon_bed` * `--amplicon_right_suffix` - to provide right primer suffix used in name field of `--amplicon_bed` diff --git a/docs/usage.md b/docs/usage.md index 9e5106f1..5b8ac889 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -47,6 +47,7 @@ * [`--min_base_qual`](#--min_base_qual) * [`--min_allele_freq`](#--min_allele_freq) * [`--max_allele_freq`](#--max_allele_freq) + * [`--varscan2_strand_filter`](#--varscan2_strand_filter) * [`--amplicon_left_suffix`](#--amplicon_left_suffix) * [`--amplicon_right_suffix`](#--amplicon_right_suffix) * [`--min_coverage`](#--min_coverage) @@ -409,6 +410,10 @@ Minimum allele frequency threshold for calling variants (Default: 0.25). Maximum allele frequency threshold for filtering variant calls (Default: 0.75). +### `--varscan2_strand_filter` + +Ignore Varscan 2 variants with >90% support on one strand (Default: true). Note: the strand filter will be switched off for amplicon data by default because this sort of bias may be expected. + ### `--amplicon_left_suffix` Suffix used in name field of `--amplicon_bed` to indicate left primer position (Default: '\_LEFT'). diff --git a/main.nf b/main.nf index 01d6e4e2..cc692c8c 100644 --- a/main.nf +++ b/main.nf @@ -68,6 +68,7 @@ def helpMessage() { --min_coverage [int] When performing variant calling skip positions with an overall read depth smaller than this number (Default: 10) --min_allele_freq [float] Minimum allele frequency threshold for calling variants (Default: 0.25) --max_allele_freq [float] Maximum allele frequency threshold for filtering variant calls (Default: 0.75) + --varscan2_strand_filter [bool] Ignore Varscan 2 variants with >90% support on one strand (Default: true) --amplicon_left_suffix [str] Suffix used in name field of --amplicon_bed to indicate left primer position (Default: '_LEFT') --amplicon_right_suffix [str] Suffix used in name field of --amplicon_bed to indicate right primer position (Default: '_RIGHT') --save_align_intermeds [bool] Save the intermediate BAM files from the alignment steps (Default: false) @@ -261,6 +262,7 @@ if (!params.skip_variants) { summary['Min Read Depth'] = params.min_coverage summary['Min Allele Freq'] = params.min_allele_freq summary['Max Allele Freq'] = params.max_allele_freq + if (params.varscan2_strand_filter) summary['Varscan2 Strand Filter'] = 'Yes' if (params.save_align_intermeds) summary['Save Align Intermeds'] = 'Yes' if (params.save_mpileup) summary['Save mpileup'] = 'Yes' if (params.skip_markduplicates) summary['Skip MarkDuplicates'] = 'Yes' @@ -1252,7 +1254,7 @@ process SAMTOOLS_MPILEUP { ${bam[0]} """ } - +println(params.protocol != 'amplicon' && params.varscan2_strand_filter) /* * STEP 5.7.1: Variant calling with VarScan 2 */ @@ -1284,6 +1286,7 @@ process VARSCAN2 { script: prefix = "${sample}.AF${params.max_allele_freq}" + strand = params.protocol != 'amplicon' && params.varscan2_strand_filter ? "--strand-filter 1" : "--strand-filter 0" """ echo "$sample" > sample_name.list varscan mpileup2cns \\ @@ -1294,9 +1297,9 @@ process VARSCAN2 { --min-var-freq $params.min_allele_freq \\ --p-value 0.99 \\ --output-vcf 1 \\ - --strand-filter 0 \\ --vcf-sample-list sample_name.list \\ --variants \\ + $strand \\ 2> ${sample}.varscan2.log \\ | bgzip -c > ${sample}.vcf.gz tabix -p vcf -f ${sample}.vcf.gz diff --git a/nextflow.config b/nextflow.config index 8f24e167..2da70044 100644 --- a/nextflow.config +++ b/nextflow.config @@ -51,6 +51,7 @@ params { min_coverage = 10 min_allele_freq = 0.25 max_allele_freq = 0.75 + varscan2_strand_filter = true amplicon_left_suffix = '_LEFT' amplicon_right_suffix = '_RIGHT' save_align_intermeds = false From 72a22bc26cb094d428e2c46d1b552f11a8862d4d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 14:01:11 +0100 Subject: [PATCH 094/129] Bugfixes --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index cc692c8c..801e6372 100644 --- a/main.nf +++ b/main.nf @@ -316,7 +316,7 @@ if (params.email || params.email_on_fail) { summary['E-mail on failure'] = params.email_on_fail summary['MultiQC maxsize'] = params.max_multiqc_email_size } -log.info summary.collect { k,v -> "${k.padRight(21)}: $v" }.join("\n") +log.info summary.collect { k,v -> "${k.padRight(22)}: $v" }.join("\n") log.info "-\033[2m--------------------------------------------------\033[0m-" // Check the hostnames against configured profiles @@ -1254,7 +1254,7 @@ process SAMTOOLS_MPILEUP { ${bam[0]} """ } -println(params.protocol != 'amplicon' && params.varscan2_strand_filter) + /* * STEP 5.7.1: Variant calling with VarScan 2 */ From 0e99a68dd9baac9947c73e16258ca7e931dd2306 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 15:41:30 +0100 Subject: [PATCH 095/129] Ignore some processes based on real data --- main.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/main.nf b/main.nf index 801e6372..10a818dc 100644 --- a/main.nf +++ b/main.nf @@ -1066,6 +1066,7 @@ if (params.skip_markduplicates) { process PICARD_METRICS { tag "$sample" label 'process_medium' + label 'error_ignore' publishDir "${params.outdir}/variants/bam/picard_metrics", mode: params.publish_dir_mode when: @@ -1261,6 +1262,7 @@ process SAMTOOLS_MPILEUP { process VARSCAN2 { tag "$sample" label 'process_medium' + label 'error_ignore' publishDir "${params.outdir}/variants/varscan2", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith(".log")) "log/$filename" From 65da25e5ae399330f007d2ebed7e1dc9c5dcb79d Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 16:26:17 +0100 Subject: [PATCH 096/129] Ignore some more processes --- main.nf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main.nf b/main.nf index 10a818dc..7a2260eb 100644 --- a/main.nf +++ b/main.nf @@ -1434,6 +1434,7 @@ process VARSCAN2_SNPEFF { */ process VARSCAN2_QUAST { label 'process_medium' + label 'error_ignore' publishDir "${params.outdir}/variants/varscan2/quast", mode: params.publish_dir_mode when: @@ -1620,6 +1621,7 @@ process IVAR_SNPEFF { */ process IVAR_QUAST { label 'process_medium' + label 'error_ignore' publishDir "${params.outdir}/variants/ivar/quast", mode: params.publish_dir_mode when: @@ -1784,6 +1786,7 @@ process BCFTOOLS_SNPEFF { */ process BCFTOOLS_QUAST { label 'process_medium' + label 'error_ignore' publishDir "${params.outdir}/variants/bcftools", mode: params.publish_dir_mode when: From 33eb85c1af442fc2553b4d8d65b6d37cf843a6e1 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 18:34:42 +0100 Subject: [PATCH 097/129] Add --min_mapped_reads param and do some cool stuff with it --- CHANGELOG.md | 1 + docs/usage.md | 5 ++++ main.nf | 69 ++++++++++++++++++++++++++++++++++++++++++++++++- nextflow.config | 1 + 4 files changed, 75 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f29f209c..ad6f8bb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * [#124](https://github.com/nf-core/viralrecon/issues/124) - Intersect variants across callers * [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub * Parameters: + * `--min_mapped_reads` to circumvent failures for samples with low number of mapped reads * `--varscan2_strand_filter` to toggle the default Varscan 2 strand filter * `--skip_mosdepth` - skip genome-wide and amplicon coverage plot generation from mosdepth output * `--amplicon_left_suffix` - to provide left primer suffix used in name field of `--amplicon_bed` diff --git a/docs/usage.md b/docs/usage.md index 5b8ac889..5355f3c0 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -37,6 +37,7 @@ * [`--skip_kraken2`](#--skip_kraken2) * [Variant calling](#variant-calling) * [`--callers`](#-callers) + * [`--min_mapped_reads`](#-min_mapped_reads) * [`--ivar_trim_noprimer`](#--ivar_trim_noprimer) * [`--ivar_trim_min_len`](#--ivar_trim_min_len) * [`--ivar_trim_min_qual`](#--ivar_trim_min_qual) @@ -366,6 +367,10 @@ Skip Kraken 2 process for removing host classified reads (Default: false). Specify which variant calling algorithms you would like to use. Available options are `varscan2`, `ivar` and `bcftools` (Default: 'varscan2,ivar,bcftools'). +### `--min_mapped_reads` + +Minimum number of mapped reads below which samples are removed from further processing (Default: 1000). Some downstream steps in the pipeline will fail if this threshold is too low. + ### `--ivar_trim_noprimer` This option unsets the `-e` parameter in `ivar trim` to discard reads without primers (Default: false). diff --git a/main.nf b/main.nf index 7a2260eb..a1eb5f58 100644 --- a/main.nf +++ b/main.nf @@ -57,6 +57,7 @@ def helpMessage() { Variant calling --callers [str] Specify which variant calling algorithms you would like to use (Default: 'varscan2,ivar,bcftools') + --min_mapped_reads [int] Minimum number of mapped reads below which samples are removed from further processing (Default: 1000) --ivar_trim_noprimer [bool] Unset -e parameter for iVar trim. Reads with primers are included by default (Default: false) --ivar_trim_min_len [int] Minimum length of read to retain after trimming (Default: 20) --ivar_trim_min_qual [int] Minimum quality threshold for sliding window to pass (Default: 20) @@ -251,6 +252,7 @@ if (params.skip_amplicon_trimming) summary['Skip Amplicon Trimming'] = 'Yes' if (params.save_trimmed) summary['Save Trimmed'] = 'Yes' if (!params.skip_variants) { summary['Variant Calling Tools'] = params.callers + summary['Min Mapped Reads'] = params.min_mapped_reads if (params.ivar_trim_noprimer) summary['iVar Trim Exclude'] = 'Yes' summary['iVar Trim Min Len'] = params.ivar_trim_min_len summary['iVar Trim Min Qual'] = params.ivar_trim_min_qual @@ -918,7 +920,7 @@ process SORT_BAM { tuple val(sample), val(single_end), path(bam) from ch_bowtie2_bam output: - tuple val(sample), val(single_end), path("*.sorted.{bam,bam.bai}") into ch_sort_bam + tuple val(sample), val(single_end), path("*.sorted.{bam,bam.bai}"), path("*.flagstat") into ch_sort_bam path "*.{flagstat,idxstats,stats}" into ch_sort_bam_flagstat_mqc script: @@ -931,6 +933,43 @@ process SORT_BAM { """ } +// Get total number of mapped reads from flagstat file +def get_mapped_from_flagstat(flagstat) { + def mapped = 0 + flagstat.eachLine { line -> + if (line.contains(' mapped (')) { + mapped = line.tokenize().first().toInteger() + } + } + return mapped +} + +// Function that checks the number of mapped reads from flagstat output +// and returns true if > params.min_mapped_reads and otherwise false +pass_mapped_reads = [:] +fail_mapped_reads = [:] +def check_mapped(sample,flagstat,min_mapped_reads=500) { + mapped = get_mapped_from_flagstat(flagstat) + c_reset = params.monochrome_logs ? '' : "\033[0m"; + c_green = params.monochrome_logs ? '' : "\033[0;32m"; + c_red = params.monochrome_logs ? '' : "\033[0;31m"; + if (mapped < min_mapped_reads.toInteger()) { + log.info "#${c_red}################### FAILED MAPPED READ THRESHOLD! IGNORING FOR FURTHER DOWNSTREAM ANALYSIS! ($sample) >> ${mapped} <<${c_reset}" + fail_mapped_reads[sample] = mapped + return false + } else { + //log.info "-${c_green} Passed mapped read threshold > bowtie2 ($sample) >> ${mapped} <<${c_reset}" + pass_mapped_reads[sample] = mapped + return true + } +} + +// Remove samples that failed mapped read threshold +ch_sort_bam + .filter { sample, single_end, bam, flagstat -> check_mapped(sample,flagstat,params.min_mapped_reads) } + .map { it[0..2] } + .set { ch_sort_bam } + /* * STEP 5.3: Trim amplicon sequences with iVar */ @@ -3206,6 +3245,7 @@ process get_software_versions { * STEP 7: MultiQC */ process MULTIQC { + label 'process_medium' publishDir "${params.outdir}", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith("assembly_metrics_mqc.tsv")) "assembly/$filename" @@ -3298,6 +3338,9 @@ workflow.onComplete { // Set up the e-mail variables def subject = "[nf-core/viralrecon] Successful: $workflow.runName" + if (fail_mapped_reads.size() > 0) { + subject = "[nf-core/viralrecon] Partially Successful (${fail_mapped_reads.size()} skipped): $workflow.runName" + } if (!workflow.success) { subject = "[nf-core/viralrecon] FAILED: $workflow.runName" } @@ -3320,6 +3363,8 @@ workflow.onComplete { if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision + email_fields['fail_mapped_reads'] = fail_mapped_reads.keySet() + email_fields['min_mapped_reads'] = params.min_mapped_reads email_fields['summary']['Nextflow Version'] = workflow.nextflow.version email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp @@ -3390,6 +3435,28 @@ workflow.onComplete { c_red = params.monochrome_logs ? '' : "\033[0;31m"; c_reset = params.monochrome_logs ? '' : "\033[0m"; + if (pass_mapped_reads.size() > 0) { + idx = 0 + sample_mapped = '' + total_count = pass_mapped_reads.size() + fail_mapped_reads.size() + for (sample in pass_mapped_reads) { + sample_mapped += " ${sample.key}: ${sample.value}\n" + idx += 1 + if (idx > 5) { + sample_mapped += " ..see pipeline reports for full list\n" + break + } + } + //log.info "[${c_purple}nf-core/viralrecon${c_reset}] ${c_green}${pass_mapped_reads.size()}/${total_count} samples passed minimum mapped reads check\n${sample_mapped}${c_reset}" + } + if (fail_mapped_reads.size() > 0) { + sample_mapped = '' + fail_mapped_reads.each { sample, value -> + sample_mapped += " ${sample}: ${value}\n" + } + log.info "[${c_purple}nf-core/viralrecon${c_reset}] ${c_red} WARNING - ${fail_mapped_reads.size()} samples skipped due to low number of mapped reads!\n${sample_mapped}${c_reset}" + } + if (workflow.stats.ignoredCount > 0 && workflow.success) { log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}-" log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}-" diff --git a/nextflow.config b/nextflow.config index 2da70044..e65e8d3f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -40,6 +40,7 @@ params { // Options: Variant calling callers = 'varscan2,ivar,bcftools' + min_mapped_reads = 1000 ivar_trim_noprimer = false ivar_trim_min_len = 20 ivar_trim_min_qual = 20 From bffe8ff9170bdc4faf16f514f4c9806c92e6e69b Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 19:23:01 +0100 Subject: [PATCH 098/129] Remove ignore processes --- main.nf | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/main.nf b/main.nf index a1eb5f58..7a9c243b 100644 --- a/main.nf +++ b/main.nf @@ -253,7 +253,7 @@ if (params.save_trimmed) summary['Save Trimmed'] = 'Yes' if (!params.skip_variants) { summary['Variant Calling Tools'] = params.callers summary['Min Mapped Reads'] = params.min_mapped_reads - if (params.ivar_trim_noprimer) summary['iVar Trim Exclude'] = 'Yes' + if (params.ivar_trim_noprimer) summary['iVar Trim Exclude'] = 'Yes' summary['iVar Trim Min Len'] = params.ivar_trim_min_len summary['iVar Trim Min Qual'] = params.ivar_trim_min_qual summary['iVar Trim Window'] = params.ivar_trim_window_width @@ -954,7 +954,7 @@ def check_mapped(sample,flagstat,min_mapped_reads=500) { c_green = params.monochrome_logs ? '' : "\033[0;32m"; c_red = params.monochrome_logs ? '' : "\033[0;31m"; if (mapped < min_mapped_reads.toInteger()) { - log.info "#${c_red}################### FAILED MAPPED READ THRESHOLD! IGNORING FOR FURTHER DOWNSTREAM ANALYSIS! ($sample) >> ${mapped} <<${c_reset}" + log.info "#${c_red}#### $sample >> ${mapped} << FAILED MAPPED READ THRESHOLD OF ${params.min_mapped_reads}! IGNORING FOR FURTHER DOWNSTREAM ANALYSIS! ${c_reset}" fail_mapped_reads[sample] = mapped return false } else { @@ -1105,7 +1105,6 @@ if (params.skip_markduplicates) { process PICARD_METRICS { tag "$sample" label 'process_medium' - label 'error_ignore' publishDir "${params.outdir}/variants/bam/picard_metrics", mode: params.publish_dir_mode when: @@ -1301,7 +1300,6 @@ process SAMTOOLS_MPILEUP { process VARSCAN2 { tag "$sample" label 'process_medium' - label 'error_ignore' publishDir "${params.outdir}/variants/varscan2", mode: params.publish_dir_mode, saveAs: { filename -> if (filename.endsWith(".log")) "log/$filename" @@ -1473,7 +1471,6 @@ process VARSCAN2_SNPEFF { */ process VARSCAN2_QUAST { label 'process_medium' - label 'error_ignore' publishDir "${params.outdir}/variants/varscan2/quast", mode: params.publish_dir_mode when: @@ -1660,7 +1657,6 @@ process IVAR_SNPEFF { */ process IVAR_QUAST { label 'process_medium' - label 'error_ignore' publishDir "${params.outdir}/variants/ivar/quast", mode: params.publish_dir_mode when: @@ -1825,7 +1821,6 @@ process BCFTOOLS_SNPEFF { */ process BCFTOOLS_QUAST { label 'process_medium' - label 'error_ignore' publishDir "${params.outdir}/variants/bcftools", mode: params.publish_dir_mode when: From 0f87cd63d4a737ef8a2c5e1d27afb9422a41677f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Fri, 19 Jun 2020 20:17:10 +0100 Subject: [PATCH 099/129] Update Zenodo DOI --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7d708da0..fb35f16d 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![GitHub Actions CI Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/viralrecon/actions) [![GitHub Actions Linting Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/viralrecon/actions) [![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.10.0-brightgreen.svg)](https://www.nextflow.io/) -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3872730.svg)](https://doi.org/10.5281/zenodo.3872730) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3901628.svg)](https://doi.org/10.5281/zenodo.3901628) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) [![Docker](https://img.shields.io/docker/automated/nfcore/viralrecon.svg)](https://hub.docker.com/r/nfcore/viralrecon) @@ -144,7 +144,7 @@ For further information or help, don't hesitate to get in touch on [Slack `#vira ## Citation -If you use nf-core/viralrecon for your analysis, please cite it using the following doi: [10.5281/zenodo.3872730](https://doi.org/10.5281/zenodo.3872730) +If you use nf-core/viralrecon for your analysis, please cite it using the following doi: [10.5281/zenodo.3901628](https://doi.org/10.5281/zenodo.3901628) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](https://github.com/nf-core/viralrecon/blob/master/CITATIONS.md) file. From 0616e2806c0bf33f64833bb2f8c7c25941cb5123 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 09:35:26 +0100 Subject: [PATCH 100/129] No idea what I did here..will check commit --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 7a9c243b..015aa2ef 100644 --- a/main.nf +++ b/main.nf @@ -954,7 +954,7 @@ def check_mapped(sample,flagstat,min_mapped_reads=500) { c_green = params.monochrome_logs ? '' : "\033[0;32m"; c_red = params.monochrome_logs ? '' : "\033[0;31m"; if (mapped < min_mapped_reads.toInteger()) { - log.info "#${c_red}#### $sample >> ${mapped} << FAILED MAPPED READ THRESHOLD OF ${params.min_mapped_reads}! IGNORING FOR FURTHER DOWNSTREAM ANALYSIS! ${c_reset}" + log.info "#${c_red}#### $sample FAILED MAPPED READ THRESHOLD - ${mapped} < ${params.min_mapped_reads}. IGNORING FOR FURTHER DOWNSTREAM ANALYSIS! ${c_reset}" fail_mapped_reads[sample] = mapped return false } else { @@ -3363,7 +3363,7 @@ workflow.onComplete { email_fields['summary']['Nextflow Version'] = workflow.nextflow.version email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - + println(fail_mapped_reads.keySet()) // On success try attach the multiqc report def mqc_report = null try { From 5c3ae0e161185894d21ad3d2a0d968ef5c355b64 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 10:43:56 +0100 Subject: [PATCH 101/129] Add Jerome and Rob --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index fb35f16d..8994e296 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,7 @@ Many thanks to others who have helped out and contributed along the way too, inc | [Erik Garrison](https://github.com/ekg) | [UCSC, USA](https://www.ucsc.edu/) | | [Gisela Gabernet](https://github.com/ggabernet) | [QBiC, University of Tübingen, Germany](https://portal.qbic.uni-tuebingen.de/portal/) | | [Joao Curado](https://github.com/jcurado-flomics) | [Flomics Biotech, Spain](https://www.flomics.com/) | +| [Jerome Nicod](https://github.com/Jeromics) | [The Francis Crick Institute, UK](https://www.crick.ac.uk) | | [Jose Espinosa-Carrasco](https://github.com/JoseEspinosa) | [Centre for Genomic Regulation, Spain](https://www.crg.eu/) | | [Katrin Sameith](https://github.com/ktrns) | [DRESDEN-concept Genome Center, Germany](https://genomecenter.tu-dresden.de) | | [Lluc Cabus](https://github.com/lcabus-flomics) | [Flomics Biotech, Spain](https://www.flomics.com/) | @@ -130,6 +131,7 @@ Many thanks to others who have helped out and contributed along the way too, inc | [Michael Heuer](https://github.com/heuermh) | [UC Berkeley, USA](https://https://rise.cs.berkeley.edu) | | [Phil Ewels](https://github.com/ewels) | [SciLifeLab, Sweden](https://www.scilifelab.se/) | | [Richard Mitter](https://github.com/rjmitter) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | +| [Robert Goldstone](https://github.com/rjgoldstone) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | | [Simon Heumos](https://github.com/subwaystation) | [QBiC, University of Tübingen, Germany](https://portal.qbic.uni-tuebingen.de/portal/) | | [Stephen Kelly](https://github.com/stevekm) | [Memorial Sloan Kettering Cancer Center, USA](https://www.mskcc.org/) | | [Thanh Le Viet](https://github.com/thanhleviet) | [Quadram Institute, UK](https://quadram.ac.uk/) | From 8a78b5b2d221b13bdba6c51a34a3c617673639fc Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 10:44:07 +0100 Subject: [PATCH 102/129] Add failed samples to email --- assets/email_template.html | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/assets/email_template.html b/assets/email_template.html index 7dcbc3b8..ca2c3685 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -16,7 +16,7 @@

    nf-core/viralrecon v${version}

    Run Name: $runName

    -<% if (!success){ +<% if (!success) { out << """

    nf-core/viralrecon execution completed unsuccessfully!

    @@ -25,6 +25,17 @@

    nf-core/viralrecon execution completed
    ${errorReport}

    """ +} else if (fail_mapped_reads.size() > 0) { + out << """ +
    +

    nf-core/viralrecon execution completed with warnings!

    +

    The pipeline finished successfully, but the following samples were skipped due to failing the minimum mapped read threshold (< ${min_mapped_reads}):

    +
      +
    • ${fail_mapped_reads.sort().join('
    • ')}
    • +
    +

    +

    + """ } else { out << """
    From 587f47ca7f3dc183a4f536cf5371d7a63c4f7c03 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 10:44:16 +0100 Subject: [PATCH 103/129] Add failed samples to pipeline report --- assets/email_template.txt | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/assets/email_template.txt b/assets/email_template.txt index f7235df3..e4a09c92 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -9,19 +9,31 @@ Run Name: $runName -<% if (success){ - out << "## nf-core/viralrecon execution completed successfully! ##" -} else { +<% if (!success){ out << """#################################################### -## nf-core/viralrecon execution completed unsuccessfully! ## -#################################################### +## nf-core/viralrecon completed unsuccessfully! ## +####################################################\n The exit status of the task that caused the workflow execution to fail was: $exitStatus. The full error message was: ${errorReport} """ -} %> +} else if (fail_mapped_reads.size() > 0) { + out << """#################################################### +## nf-core/viralrecon completed with warnings! ## +####################################################\n +The pipeline finished successfully, but the following samples were skipped +due to failing the minimum mapped read threshold (less than ${min_mapped_reads}): + - ${fail_mapped_reads.sort().join("\n - ")} +""" +} else { + out << """#################################################### +## nf-core/viralrecon completed successfully! ## +####################################################\n +""" +} +%> The workflow was completed at $dateComplete (duration: $duration) @@ -30,11 +42,11 @@ The command used to launch the workflow was as follows: $commandLine - Pipeline Configuration: ----------------------- <% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> + -- nf-core/viralrecon https://github.com/nf-core/viralrecon From 0136b0a8552ef66e1ee10cf6dbdb8041efd94db4 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 10:44:31 +0100 Subject: [PATCH 104/129] Tweaks for mapped read threshold code --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 015aa2ef..d7854aa9 100644 --- a/main.nf +++ b/main.nf @@ -954,7 +954,7 @@ def check_mapped(sample,flagstat,min_mapped_reads=500) { c_green = params.monochrome_logs ? '' : "\033[0;32m"; c_red = params.monochrome_logs ? '' : "\033[0;31m"; if (mapped < min_mapped_reads.toInteger()) { - log.info "#${c_red}#### $sample FAILED MAPPED READ THRESHOLD - ${mapped} < ${params.min_mapped_reads}. IGNORING FOR FURTHER DOWNSTREAM ANALYSIS! ${c_reset}" + log.info "${c_red}>>>> $sample FAILED MAPPED READ THRESHOLD: ${mapped} < ${params.min_mapped_reads}. IGNORING FOR FURTHER DOWNSTREAM ANALYSIS! <<<<${c_reset}" fail_mapped_reads[sample] = mapped return false } else { @@ -3363,7 +3363,7 @@ workflow.onComplete { email_fields['summary']['Nextflow Version'] = workflow.nextflow.version email_fields['summary']['Nextflow Build'] = workflow.nextflow.build email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - println(fail_mapped_reads.keySet()) + // On success try attach the multiqc report def mqc_report = null try { From d6b598d15d583856b4c46c528e2a0ecb9fb20be8 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 10:57:46 +0100 Subject: [PATCH 105/129] Update spacing --- assets/email_template.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/email_template.txt b/assets/email_template.txt index e4a09c92..9b78bb18 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -11,7 +11,7 @@ Run Name: $runName <% if (!success){ out << """#################################################### -## nf-core/viralrecon completed unsuccessfully! ## +## nf-core/viralrecon completed unsuccessfully! ## ####################################################\n The exit status of the task that caused the workflow execution to fail was: $exitStatus. The full error message was: @@ -29,7 +29,7 @@ due to failing the minimum mapped read threshold (less than ${min_mapped_reads}) """ } else { out << """#################################################### -## nf-core/viralrecon completed successfully! ## +## nf-core/viralrecon completed successfully! ## ####################################################\n """ } From cd803304267c33a4d0e433ca4eec4a852403475b Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 11:11:25 +0100 Subject: [PATCH 106/129] Update container tags --- .github/workflows/ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 35f8ec7f..36f2f8c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: - name: Build new docker image if: env.GIT_DIFF - run: docker build --no-cache . -t nfcore/viralrecon:dev + run: docker build --no-cache . -t nfcore/viralrecon:1.1.0 - name: Pull docker image if: ${{ !env.GIT_DIFF }} @@ -75,7 +75,7 @@ jobs: - name: Build new docker image if: env.GIT_DIFF - run: docker build --no-cache . -t nfcore/viralrecon:dev + run: docker build --no-cache . -t nfcore/viralrecon:1.1.0 - name: Pull docker image if: ${{ !env.GIT_DIFF }} @@ -115,7 +115,7 @@ jobs: - name: Build new docker image if: env.GIT_DIFF - run: docker build --no-cache . -t nfcore/viralrecon:dev + run: docker build --no-cache . -t nfcore/viralrecon:1.1.0 - name: Pull docker image if: ${{ !env.GIT_DIFF }} @@ -155,7 +155,7 @@ jobs: - name: Build new docker image if: env.GIT_DIFF - run: docker build --no-cache . -t nfcore/viralrecon:dev + run: docker build --no-cache . -t nfcore/viralrecon:1.1.0 - name: Pull docker image if: ${{ !env.GIT_DIFF }} From a5e252cedba83e4b5b8250d1f109d7ddb0db1e65 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 11:16:44 +0100 Subject: [PATCH 107/129] Update report --- docs/html/multiqc_report.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/html/multiqc_report.html b/docs/html/multiqc_report.html index 2d2c05b2..a0309e10 100644 --- a/docs/html/multiqc_report.html +++ b/docs/html/multiqc_report.html @@ -13241,7 +13241,7 @@

    nf-core/viralrecon Software Versions

    -
    nf-core/viralrecon
    v1.1.0dev
    +
    nf-core/viralrecon
    v1.1.0
    Nextflow
    v20.01.0
    parallel-fastq-dump
    v0.6.6
    FastQC
    v0.11.9
    @@ -13316,7 +13316,7 @@

    nf-core/viralrecon Workflow Summary

    Assembly Tools
    spades,metaspades,unicycler,minia
    Minia Kmer Size
    31
    Max Resources
    224 GB memory, 32 cpus, 3d time per job
    -
    Container
    singularity - nfcore-viralrecon-dev.img
    +
    Container
    singularity - nfcore-viralrecon-1.1.0.img
    Output dir
    ./results
    Publish dir mode
    copy
    Launch dir
    nfcore/viralrecon/test_full
    From 4691cd787bc58980bc22fc365e03c821690f77d6 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 11:18:34 +0100 Subject: [PATCH 108/129] Fix markdownlint --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8994e296..1a1e8166 100644 --- a/README.md +++ b/README.md @@ -122,7 +122,7 @@ Many thanks to others who have helped out and contributed along the way too, inc | [Erik Garrison](https://github.com/ekg) | [UCSC, USA](https://www.ucsc.edu/) | | [Gisela Gabernet](https://github.com/ggabernet) | [QBiC, University of Tübingen, Germany](https://portal.qbic.uni-tuebingen.de/portal/) | | [Joao Curado](https://github.com/jcurado-flomics) | [Flomics Biotech, Spain](https://www.flomics.com/) | -| [Jerome Nicod](https://github.com/Jeromics) | [The Francis Crick Institute, UK](https://www.crick.ac.uk) | +| [Jerome Nicod](https://github.com/Jeromics) | [The Francis Crick Institute, UK](https://www.crick.ac.uk) | | [Jose Espinosa-Carrasco](https://github.com/JoseEspinosa) | [Centre for Genomic Regulation, Spain](https://www.crg.eu/) | | [Katrin Sameith](https://github.com/ktrns) | [DRESDEN-concept Genome Center, Germany](https://genomecenter.tu-dresden.de) | | [Lluc Cabus](https://github.com/lcabus-flomics) | [Flomics Biotech, Spain](https://www.flomics.com/) | From 36cf2b07240139ac5a3f5b909ce2b03f6a7cfa97 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 13:36:54 +0100 Subject: [PATCH 109/129] Update MultiQC config for efficiency --- assets/multiqc_config.yaml | 205 +++++++++++++++++++++++-------------- 1 file changed, 129 insertions(+), 76 deletions(-) diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 49bc1e70..07ab4b27 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -24,29 +24,50 @@ run_modules: exclude_modules: - 'general_stats' +# See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml +custom_data: + summary_variants_metrics: + section_name: 'Variant calling metrics' + description: 'generated by the nf-core/viralrecon pipeline' + plot_type: 'table' + pconfig: + id: 'summary_variants_metrics_plot' + table_title: 'Variant calling metrics' + namespace: 'Variant calling metrics' + format: '{:.0f}' + summary_assembly_metrics: + section_name: 'De novo assembly metrics' + description: 'generated by the nf-core/viralrecon pipeline' + plot_type: 'table' + pconfig: + id: 'summary_assembly_metrics_plot' + table_title: 'De novo assembly metrics' + namespace: 'De novo assembly metrics' + format: '{:.0f}' + module_order: - fastqc: name: 'PREPROCESS: FastQC (raw reads)' anchor: 'fastqc_raw' info: 'This section of the report shows FastQC results for the raw reads before adapter trimming.' path_filters: - - './fastqc/*' + - './fastqc/*.zip' - fastp: name: 'PREPROCESS: fastp (adapter trimming)' info: 'This section of the report shows fastp results for reads after adapter and quality trimming.' - path_filters: - - './fastp/log/*' + #path_filters: + # - './fastp/log/*.json' - fastqc: name: 'PREPROCESS: FastQC (adapter trimming)' anchor: 'fastqc_fastp' info: 'This section of the report shows FastQC results for reads after adapter and quality trimming.' path_filters: - - './fastp/fastqc/*' + - './fastp/fastqc/*.zip' - bowtie2: name: 'VARIANTS: Bowtie 2' info: 'This section of the report shows Bowtie 2 mapping results for reads after adapter trimming and quality trimming.' - path_filters: - - './bowtie2/log/*' + #path_filters: + # - './bowtie2/log/*.log' - samtools: name: 'VARIANTS: SAMTools (raw)' anchor: 'samtools_bowtie2' @@ -56,8 +77,8 @@ module_order: - ivar: name: 'VARIANTS: iVar trim' info: 'This section of the report shows counts observed for each amplicon primer per sample as detected by iVar trim.' - path_filters: - - './ivar/trim/log/*' + # path_filters: + # - './ivar/trim/log/*.log' - samtools: name: 'VARIANTS: SAMTools (iVar)' anchor: 'samtools_ivar' @@ -73,160 +94,160 @@ module_order: - picard: name: 'VARIANTS: Picard Metrics' info: 'This section of the report shows picard CollectMultipleMetrics and MarkDuplicates results after mapping (if "--protocol amplicon" this will be after primer sequence removal with iVar).' - path_filters: - - './picard/metrics/*' + #path_filters: + # - './picard/metrics/*' - mosdepth: name: 'VARIANTS: mosdepth' info: 'This section of the report shows genome-wide coverage metrics generated by mosdepth.' - path_filters: - - './mosdepth/genome/*' + # path_filters: + # - './mosdepth/genome/*' - varscan2: name: 'VARIANTS: VarScan 2' info: 'This section of the report shows total number of variants called by VarScan 2 broken down by those that were reported or not.' - path_filters: - - './varscan2/counts/lowfreq/*' + #path_filters: + # - './varscan2/counts/lowfreq/*' - bcftools: name: 'VARIANTS: BCFTools (VarScan 2; high freq)' anchor: 'bcftools_varscan2' info: 'This section of the report shows BCFTools stats results for high frequency variants called by VarScan 2. The allele frequency filtering threshold can be set by the --max_allele_freq parameter (Default: 0.8).' path_filters: - - './varscan2/bcftools/highfreq/*' + - './varscan2/bcftools/highfreq/*.txt' - snpeff: name: 'VARIANTS: SnpEff (VarScan 2; high freq)' anchor: 'snpeff_varscan2' info: 'This section of the report shows SnpEff results for high frequency variants called by VarScan 2. The allele frequency filtering threshold can be set by the --max_allele_freq parameter (Default: 0.8).' path_filters: - - './varscan2/snpeff/highfreq/*' + - './varscan2/snpeff/highfreq/*.csv' - quast: name: 'VARIANTS: QUAST (VarScan 2; high freq)' anchor: 'quast_varscan2' info: 'This section of the report shows QUAST results for consensus sequences generated from high frequency variants with VarScan 2. The allele frequency filtering threshold can be set by the --max_allele_freq parameter (Default: 0.8).' path_filters: - - './varscan2/quast/highfreq/*' + - './varscan2/quast/highfreq/*.tsv' - bcftools: name: 'VARIANTS: BCFTools (iVar; high freq)' anchor: 'bcftools_ivar' info: 'This section of the report shows BCFTools stats results for high frequency variants called by iVar. The allele frequency filtering threshold can be set by the --max_allele_freq parameter (Default: 0.8).' path_filters: - - './ivar/variants/bcftools/highfreq/*' + - './ivar/variants/bcftools/highfreq/*.txt' - snpeff: name: 'VARIANTS: SnpEff (iVar; high freq)' anchor: 'snpeff_ivar' info: 'This section of the report shows SnpEff results for high frequency variants called by iVar. The allele frequency filtering threshold can be set by the --max_allele_freq parameter (Default: 0.8).' path_filters: - - './ivar/variants/snpeff/highfreq/*' + - './ivar/variants/snpeff/highfreq/*.csv' - quast: name: 'VARIANTS: QUAST (iVar; high freq)' anchor: 'quast_ivar' info: 'This section of the report shows QUAST results for consensus sequences generated from high frequency variants with iVar. The allele frequency filtering threshold can be set by the --max_allele_freq parameter (Default: 0.8).' path_filters: - - './ivar/consensus/quast/highfreq/*' + - './ivar/consensus/quast/highfreq/*.tsv' - bcftools: name: 'VARIANTS: BCFTools (BCFTools)' anchor: 'bcftools_bcftools' info: 'This section of the report shows BCFTools stats results for variants called by BCFTools.' path_filters: - - './bcftools/variants/bcftools/*' + - './bcftools/variants/bcftools/*.txt' - snpeff: name: 'VARIANTS: SnpEff (BCFTools)' anchor: 'snpeff_bcftools' info: 'This section of the report shows SnpEff results for variants called by BCFTools.' path_filters: - - './bcftools/variants/snpeff/*' + - './bcftools/variants/snpeff/*.csv' - quast: name: 'VARIANTS: QUAST (BCFTools)' anchor: 'quast_bcftools' info: 'This section of the report shows QUAST results for consensus sequence generated from BCFTools variants.' path_filters: - - './bcftools/consensus/quast/*' + - './bcftools/consensus/quast/*.tsv' - cutadapt: name: 'ASSEMBLY: Cutadapt (primer trimming)' info: 'This section of the report shows Cutadapt results for reads after primer sequence trimming.' - path_filters: - - './cutadapt/log/*' + # path_filters: + # - './cutadapt/log/*.log' - fastqc: name: 'ASSEMBLY: FastQC (primer trimming)' anchor: 'fastqc_cutadapt' info: 'This section of the report shows FastQC results for reads after primer sequence trimming with Cutadapt.' path_filters: - - './cutadapt/fastqc/*' + - './cutadapt/fastqc/*.zip' - kraken: name: 'ASSEMBLY: Kraken 2' info: 'This section of the report shows Kraken 2 classification results for reads after primer sequence trimming with Cutadapt.' - path_filters: - - './kraken2/*' + #path_filters: + # - './kraken2/*' - quast: name: 'ASSEMBLY: QUAST (SPAdes)' anchor: 'quast_spades' info: 'This section of the report shows QUAST results from SPAdes de novo assembly.' path_filters: - - './spades/quast/*' + - './spades/quast/*.tsv' - bcftools: name: 'ASSEMBLY: BCFTools (SPAdes)' anchor: 'bcftools_spades' info: 'This section of the report shows BCFTools stats results for variants called in the SPAdes assembly relative to the reference.' path_filters: - - './spades/bcftools/*' + - './spades/bcftools/*.txt' - snpeff: name: 'ASSEMBLY: SnpEff (SPAdes)' anchor: 'snpeff_spades' info: 'This section of the report shows SnpEff results for variants called in the SPAdes assembly relative to the reference.' path_filters: - - './spades/snpeff/*' + - './spades/snpeff/*.csv' - quast: name: 'ASSEMBLY: QUAST (MetaSPAdes)' anchor: 'quast_metaspades' info: 'This section of the report shows QUAST results from MetaSPAdes de novo assembly.' path_filters: - - './metaspades/quast/*' + - './metaspades/quast/*.tsv' - bcftools: name: 'ASSEMBLY: BCFTools (MetaSPAdes)' anchor: 'bcftools_metaspades' info: 'This section of the report shows BCFTools stats results for variants called in the MetaSPAdes assembly relative to the reference.' path_filters: - - './metaspades/bcftools/*' + - './metaspades/bcftools/*.txt' - snpeff: name: 'ASSEMBLY: SnpEff (MetaSPAdes)' anchor: 'snpeff_metaspades' info: 'This section of the report shows SnpEff results for variants called in the MetaSPAdes assembly relative to the reference.' path_filters: - - './metaspades/snpeff/*' + - './metaspades/snpeff/*.csv' - quast: name: 'ASSEMBLY: QUAST (Unicycler)' anchor: 'quast_unicycler' info: 'This section of the report shows QUAST results from Unicycler de novo assembly.' path_filters: - - './unicycler/quast/*' + - './unicycler/quast/*.tsv' - bcftools: name: 'ASSEMBLY: BCFTools (Unicycler)' anchor: 'bcftools_unicycler' info: 'This section of the report shows BCFTools stats results for variants called in the Unicycler assembly relative to the reference.' path_filters: - - './unicycler/bcftools/*' + - './unicycler/bcftools/*.txt' - snpeff: name: 'ASSEMBLY: SnpEff (Unicycler)' anchor: 'snpeff_unicycler' info: 'This section of the report shows SnpEff results for variants called in the Unicycler assembly relative to the reference.' path_filters: - - './unicycler/snpeff/*' + - './unicycler/snpeff/*.csv' - quast: name: 'ASSEMBLY: QUAST (minia)' anchor: 'quast_minia' info: 'This section of the report shows QUAST results from minia de novo assembly.' path_filters: - - './minia/quast/*' + - './minia/quast/*.tsv' - bcftools: name: 'ASSEMBLY: BCFTools (minia)' anchor: 'bcftools_minia' info: 'This section of the report shows BCFTools stats results for variants called in the minia assembly relative to the reference.' path_filters: - - './minia/bcftools/*' + - './minia/bcftools/*.txt' - snpeff: name: 'ASSEMBLY: SnpEff (minia)' anchor: 'snpeff_minia' info: 'This section of the report shows SnpEff results for variants called in the minia assembly relative to the reference.' path_filters: - - './minia/snpeff/*' + - './minia/snpeff/*.csv' report_section_order: summary_assembly_metrics: @@ -238,45 +259,77 @@ report_section_order: nf-core-viralrecon-summary: order: -1002 +bcftools: + collapse_complementary_changes: true + custom_plot_config: picard_insert_size: cpswitch_c_active: False smooth_points: 1000 -bcftools: - collapse_complementary_changes: true - -# See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml -custom_data: - summary_variants_metrics: - section_name: 'Variant calling metrics' - description: 'generated by the nf-core/viralrecon pipeline' - plot_type: 'table' - pconfig: - id: 'summary_variants_metrics_plot' - table_title: 'Variant calling metrics' - namespace: 'Variant calling metrics' - format: '{:.0f}' - summary_assembly_metrics: - section_name: 'De novo assembly metrics' - description: 'generated by the nf-core/viralrecon pipeline' - plot_type: 'table' - pconfig: - id: 'summary_assembly_metrics_plot' - table_title: 'De novo assembly metrics' - namespace: 'De novo assembly metrics' - format: '{:.0f}' - extra_fn_clean_exts: - - '.trim' - - '.bowtie2' - - '.mkD' - - '.ptrim' - - '.highfreq' - - '.lowfreq' - - '.consensus' - - '.snpEff' - - '.scaffolds' - - '.kraken2' - - type: regex - pattern: '.(AF|k)[0-9]+.*' + - '.trim' + - '.bowtie2' + - '.mkD' + - '.ptrim' + - '.highfreq' + - '.lowfreq' + - '.consensus' + - '.snpEff' + - '.scaffolds' + - '.kraken2' + - type: regex + pattern: '.(AF|k)[0-9]+.*' + +# # Customise the module search patterns to speed up execution time +# # - Skip module sub-tools that we are not interested in +# # - Replace file-content searching with filename pattern searching +# # - Don't add anything that is the same as the MultiQC default +# # See https://multiqc.info/docs/#optimise-file-search-patterns for details +sp: + fastp: + fn: './fastp/log/*.json' + bowtie2: + fn: './bowtie2/log/*.log' + ivar/trim: + fn: './ivar/trim/log/*.log' + mosdepth/global_dist: + fn: './mosdepth/genome/*.dist.txt' + varscan2/mpileup2cns: + fn: './varscan2/counts/lowfreq/*.log' + cutadapt: + fn: './cutadapt/log/*.log' + kraken: + fn: './kraken2/*.txt' + picard/alignment_metrics: + fn: './picard/metrics/*.alignment_summary_metrics' + picard/insertsize: + fn: './picard/metrics/*.insert_size_metrics' + picard/markdups: + fn: './picard/metrics/*.MarkDuplicates.metrics.txt' + picard/wgs_metrics: + fn: './picard/metrics/*.coverage_metrics' + picard/basedistributionbycycle: + skip: true + picard/gcbias: + skip: true + picard/hsmetrics: + skip: true + picard/oxogmetrics: + skip: true + picard/pcr_metrics: + skip: true + picard/quality_by_cycle: + skip: true + picard/quality_score_distribution: + skip: true + picard/quality_yield_metrics: + skip: true + picard/rnaseqmetrics: + skip: true + picard/rrbs_metrics: + skip: true + picard/sam_file_validation: + skip: true + picard/variant_calling_metrics: + skip: true From c53fcab8fbdeb68a3333a58d00d9193d4cba3ff2 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 13:37:12 +0100 Subject: [PATCH 110/129] Only stage required files for MultiQC --- main.nf | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/main.nf b/main.nf index d7854aa9..f3c09576 100644 --- a/main.nf +++ b/main.nf @@ -743,8 +743,9 @@ if (!params.skip_adapter_trimming) { tuple val(sample), val(single_end), path("*.trim.fastq.gz") into ch_fastp_bowtie2, ch_fastp_cutadapt, ch_fastp_kraken2 - path "*.{log,fastp.html,json}" into ch_fastp_mqc + path "*.json" into ch_fastp_mqc path "*_fastqc.{zip,html}" into ch_fastp_fastqc_mqc + path "*.{log,fastp.html}" path "*.fail.fastq.gz" script: @@ -1482,7 +1483,8 @@ process VARSCAN2_QUAST { path gff from ch_gff output: - path "AF${params.max_allele_freq}" into ch_varscan2_quast_mqc + path "AF${params.max_allele_freq}" + path "AF${params.max_allele_freq}/report.tsv" into ch_varscan2_quast_mqc script: features = params.gff ? "--features $gff" : "" @@ -1668,7 +1670,8 @@ process IVAR_QUAST { path gff from ch_gff output: - path "AF${params.max_allele_freq}" into ch_ivar_quast_mqc + path "AF${params.max_allele_freq}" + path "AF${params.max_allele_freq}/report.tsv" into ch_ivar_quast_mqc script: features = params.gff ? "--features $gff" : "" @@ -1832,7 +1835,8 @@ process BCFTOOLS_QUAST { path gff from ch_gff output: - path "quast" into ch_bcftools_quast_mqc + path "quast" + path "quast/report.tsv" into ch_bcftools_quast_mqc script: features = params.gff ? "--features $gff" : "" @@ -2218,7 +2222,8 @@ process SPADES_QUAST { path gff from ch_gff output: - path "quast" into ch_quast_spades_mqc + path "quast" + path "quast/report.tsv" into ch_quast_spades_mqc script: features = params.gff ? "--features $gff" : "" @@ -2499,7 +2504,8 @@ process METASPADES_QUAST { path gff from ch_gff output: - path "quast" into ch_quast_metaspades_mqc + path "quast" + path "quast/report.tsv" into ch_quast_metaspades_mqc script: features = params.gff ? "--features $gff" : "" @@ -2778,7 +2784,8 @@ process UNICYCLER_QUAST { path gff from ch_gff output: - path "quast" into ch_quast_unicycler_mqc + path "quast" + path "quast/report.tsv" into ch_quast_unicycler_mqc script: features = params.gff ? "--features $gff" : "" @@ -3046,7 +3053,8 @@ process MINIA_QUAST { path gff from ch_gff output: - path "quast" into ch_quast_minia_mqc + path "quast" + path "quast/report.tsv" into ch_quast_minia_mqc script: features = params.gff ? "--features $gff" : "" From c5a7fe15f511bc8d81231f31edf7eff8038a9dab Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 14:29:58 +0100 Subject: [PATCH 111/129] Remove dir paths from fn --- assets/multiqc_config.yaml | 38 +++++++++++--------------------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 07ab4b27..4d2a0664 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -55,8 +55,6 @@ module_order: - fastp: name: 'PREPROCESS: fastp (adapter trimming)' info: 'This section of the report shows fastp results for reads after adapter and quality trimming.' - #path_filters: - # - './fastp/log/*.json' - fastqc: name: 'PREPROCESS: FastQC (adapter trimming)' anchor: 'fastqc_fastp' @@ -66,8 +64,6 @@ module_order: - bowtie2: name: 'VARIANTS: Bowtie 2' info: 'This section of the report shows Bowtie 2 mapping results for reads after adapter trimming and quality trimming.' - #path_filters: - # - './bowtie2/log/*.log' - samtools: name: 'VARIANTS: SAMTools (raw)' anchor: 'samtools_bowtie2' @@ -77,8 +73,6 @@ module_order: - ivar: name: 'VARIANTS: iVar trim' info: 'This section of the report shows counts observed for each amplicon primer per sample as detected by iVar trim.' - # path_filters: - # - './ivar/trim/log/*.log' - samtools: name: 'VARIANTS: SAMTools (iVar)' anchor: 'samtools_ivar' @@ -94,18 +88,12 @@ module_order: - picard: name: 'VARIANTS: Picard Metrics' info: 'This section of the report shows picard CollectMultipleMetrics and MarkDuplicates results after mapping (if "--protocol amplicon" this will be after primer sequence removal with iVar).' - #path_filters: - # - './picard/metrics/*' - mosdepth: name: 'VARIANTS: mosdepth' info: 'This section of the report shows genome-wide coverage metrics generated by mosdepth.' - # path_filters: - # - './mosdepth/genome/*' - varscan2: name: 'VARIANTS: VarScan 2' info: 'This section of the report shows total number of variants called by VarScan 2 broken down by those that were reported or not.' - #path_filters: - # - './varscan2/counts/lowfreq/*' - bcftools: name: 'VARIANTS: BCFTools (VarScan 2; high freq)' anchor: 'bcftools_varscan2' @@ -163,8 +151,6 @@ module_order: - cutadapt: name: 'ASSEMBLY: Cutadapt (primer trimming)' info: 'This section of the report shows Cutadapt results for reads after primer sequence trimming.' - # path_filters: - # - './cutadapt/log/*.log' - fastqc: name: 'ASSEMBLY: FastQC (primer trimming)' anchor: 'fastqc_cutadapt' @@ -174,8 +160,6 @@ module_order: - kraken: name: 'ASSEMBLY: Kraken 2' info: 'This section of the report shows Kraken 2 classification results for reads after primer sequence trimming with Cutadapt.' - #path_filters: - # - './kraken2/*' - quast: name: 'ASSEMBLY: QUAST (SPAdes)' anchor: 'quast_spades' @@ -288,27 +272,27 @@ extra_fn_clean_exts: # # See https://multiqc.info/docs/#optimise-file-search-patterns for details sp: fastp: - fn: './fastp/log/*.json' + fn: '*.fastp.json' bowtie2: - fn: './bowtie2/log/*.log' + fn: '*.bowtie2.log' ivar/trim: - fn: './ivar/trim/log/*.log' + fn: '*.ivar.log' mosdepth/global_dist: - fn: './mosdepth/genome/*.dist.txt' + fn: '*.global.dist.txt' varscan2/mpileup2cns: - fn: './varscan2/counts/lowfreq/*.log' + fn: '*.varscan2.log' cutadapt: - fn: './cutadapt/log/*.log' + fn: '*.cutadapt.log' kraken: - fn: './kraken2/*.txt' + fn: '*.kraken2.report.txt' picard/alignment_metrics: - fn: './picard/metrics/*.alignment_summary_metrics' + fn: '*.alignment_summary_metrics' picard/insertsize: - fn: './picard/metrics/*.insert_size_metrics' + fn: '*.insert_size_metrics' picard/markdups: - fn: './picard/metrics/*.MarkDuplicates.metrics.txt' + fn: '*.MarkDuplicates.metrics.txt' picard/wgs_metrics: - fn: './picard/metrics/*.coverage_metrics' + fn: '*.coverage_metrics' picard/basedistributionbycycle: skip: true picard/gcbias: From 7c70fa3b30d40fd2f41a1fdc192a67b64d9aa155 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 15:28:45 +0100 Subject: [PATCH 112/129] Add missing software versions --- bin/scrape_software_versions.py | 22 ++++++++++++++-------- main.nf | 11 +++++++---- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 574e6040..021d5d3c 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -7,28 +7,31 @@ 'nf-core/viralrecon': ['v_pipeline.txt', r"(\S+)"], 'Nextflow': ['v_nextflow.txt', r"(\S+)"], 'parallel-fastq-dump': ['v_parallel_fastq_dump.txt', r"parallel-fastq-dump\s:\s(\S+)"], + 'SRA-Tools': ['v_sratools.txt', r'"sra-pileup"\sversion\s(\S+)'], 'FastQC': ['v_fastqc.txt', r"FastQC\sv(\S+)"], 'fastp': ['v_fastp.txt', r"fastp\s(\S+)"], 'Bowtie 2': ['v_bowtie2.txt', r"bowtie2-align-s\sversion\s(\S+)"], 'Samtools': ['v_samtools.txt', r"samtools\s(\S+)"], 'BEDTools': ['v_bedtools.txt', r"bedtools\sv(\S+)"], + 'Mosdepth': ['v_mosdepth.txt', r"mosdepth\s(\S+)"], 'Picard': ['v_picard.txt', r"\n(\S+)"], 'iVar': ['v_ivar.txt', r"iVar\sversion\s(\S+)"], 'VarScan 2': ['v_varscan.txt', r"VarScan\sv(\S+)"], + 'BCFTools': ['v_bcftools.txt', r"bcftools\s(\S+)"], 'SnpEff': ['v_snpeff.txt', r"SnpEff\s(\S+)"], 'SnpSift': ['v_snpsift.txt', r"SnpSift\sversion\s(\S+)"], - 'BCFTools': ['v_bcftools.txt', r"bcftools\s(\S+)"], + 'QUAST': ['v_quast.txt', r"QUAST\sv(\S+)"], 'Cutadapt': ['v_cutadapt.txt', r"(\S+)"], 'Kraken2': ['v_kraken2.txt', r"Kraken\sversion\s(\S+)"], 'SPAdes': ['v_spades.txt', r"SPAdes\sgenome\sassembler\sv(\S+)"], 'Unicycler': ['v_unicycler.txt', r"Unicycler\sv(\S+)"], 'minia': ['v_minia.txt', r"Minia\sversion\s(\S+)"], - 'Minimap2': ['v_minimap2.txt', r"(\S+)"], - 'vg': ['v_vg.txt', r"vg\sversion\sv(\S+)"], 'BLAST': ['v_blast.txt', r"blastn:\s(\S+)"], 'ABACAS': ['v_abacas.txt', r"ABACAS.(\S+)"], - 'QUAST': ['v_quast.txt', r"QUAST\sv(\S+)"], + 'plasmidID': ['v_plasmidid.txt', r"(\S+)"], 'Bandage': ['v_bandage.txt', r"Version:\s(\S+)"], + 'Minimap2': ['v_minimap2.txt', r"(\S+)"], + 'vg': ['v_vg.txt', r"vg\sversion\sv(\S+)"], 'R': ['v_R.txt', r"R\sversion\s(\S+)"], 'MultiQC': ['v_multiqc.txt', r"multiqc,\sversion\s(\S+)"] } @@ -36,28 +39,31 @@ results['nf-core/viralrecon'] = 'N/A' results['Nextflow'] = 'N/A' results['parallel-fastq-dump'] = 'N/A' +results['SRA-Tools'] = 'N/A' results['FastQC'] = 'N/A' results['fastp'] = 'N/A' results['Bowtie 2'] = 'N/A' results['Samtools'] = 'N/A' results['BEDTools'] = 'N/A' +results['Mosdepth'] = 'N/A' results['Picard'] = 'N/A' results['iVar'] = 'N/A' results['VarScan 2'] = 'N/A' +results['BCFTools'] = 'N/A' results['SnpEff'] = 'N/A' results['SnpSift'] = 'N/A' -results['BCFTools'] = 'N/A' +results['QUAST'] = 'N/A' results['Cutadapt'] = 'N/A' results['Kraken2'] = 'N/A' results['SPAdes'] = 'N/A' results['Unicycler'] = 'N/A' results['minia'] = 'N/A' -results['Minimap2'] = 'N/A' -results['vg'] = 'N/A' results['BLAST'] = 'N/A' results['ABACAS'] = 'N/A' -results['QUAST'] = 'N/A' +results['plasmidID'] = 'N/A' results['Bandage'] = 'N/A' +results['Minimap2'] = 'N/A' +results['vg'] = 'N/A' results['R'] = 'N/A' results['MultiQC'] = 'N/A' diff --git a/main.nf b/main.nf index f3c09576..91e63dc8 100644 --- a/main.nf +++ b/main.nf @@ -3216,28 +3216,31 @@ process get_software_versions { echo $workflow.manifest.version > v_pipeline.txt echo $workflow.nextflow.version > v_nextflow.txt parallel-fastq-dump --version > v_parallel_fastq_dump.txt + sra-pileup --version > v_sratools.txt fastqc --version > v_fastqc.txt fastp --version 2> v_fastp.txt bowtie2 --version > v_bowtie2.txt samtools --version > v_samtools.txt bedtools --version > v_bedtools.txt + mosdepth --version > v_mosdepth.txt picard CollectMultipleMetrics --version &> v_picard.txt || true ivar -v > v_ivar.txt echo \$(varscan 2>&1) > v_varscan.txt + bcftools -v > v_bcftools.txt snpEff -version > v_snpeff.txt echo \$(SnpSift 2>&1) > v_snpsift.txt - bcftools -v > v_bcftools.txt + quast.py --version > v_quast.txt cutadapt --version > v_cutadapt.txt kraken2 --version > v_kraken2.txt spades.py --version > v_spades.txt unicycler --version > v_unicycler.txt minia --version > v_minia.txt - minimap2 --version > v_minimap2.txt - vg version > v_vg.txt blastn -version > v_blast.txt abacas.pl -v &> v_abacas.txt || true - quast.py --version > v_quast.txt + plasmidID -v > v_plasmidid.txt || true Bandage --version > v_bandage.txt + minimap2 --version > v_minimap2.txt + vg version > v_vg.txt echo \$(R --version 2>&1) > v_R.txt multiqc --version > v_multiqc.txt scrape_software_versions.py &> software_versions_mqc.yaml From 9770398dea8653b7fe8677b63b7645d9f95fcda5 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 16:20:26 +0100 Subject: [PATCH 113/129] Update CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad6f8bb6..d9526cf8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). -## [1.1.0] - 2020-06-22 +## [1.1.0] - 2020-06-23 ### `Added` From 9ca019f1e0267d8d4118ffd5cab591560145c492 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 17:08:43 +0100 Subject: [PATCH 114/129] Add clustering --- bin/plot_mosdepth_regions.r | 40 ++++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/bin/plot_mosdepth_regions.r b/bin/plot_mosdepth_regions.r index d76b64b8..3ce79c02 100755 --- a/bin/plot_mosdepth_regions.r +++ b/bin/plot_mosdepth_regions.r @@ -135,29 +135,33 @@ for (sample in unique(dat$sample)) { if (ncol(dat) == 6 && length(INPUT_FILES) > 1) { mat <- spread(dat[,c("sample", "region", "coverage")], sample, coverage, fill=NA, convert=FALSE) rownames(mat) <- mat[,1] - mat <- as.matrix(log10(mat[,-1] + 1)) + mat <- t(as.matrix(log10(mat[,-1] + 1))) heatmap <- Heatmap(mat, - name = "log10(Coverage+1)", - cluster_rows = FALSE, - cluster_columns = FALSE, - show_row_names = TRUE, - show_column_names = TRUE, - column_names_side = "bottom", - rect_gp = gpar(col="white", lwd=1), - show_heatmap_legend = TRUE, - heatmap_legend_param = list(title_gp=gpar(fontsize = 8), labels_gp=gpar(fontsize=6), direction="horizontal"), - row_names_gp = gpar(fontsize=6), - column_names_gp = gpar(fontsize=6), - height = unit(5, "mm")*nrow(mat), - width = unit(5, "mm")*ncol(mat), - col = viridis(50)) + column_title = "Heatmap to show amplicon coverage across multiple samples", + name = "log10(Coverage+1)", + cluster_rows = TRUE, + cluster_columns = FALSE, + show_row_names = TRUE, + show_column_names = TRUE, + column_title_side = "top", + column_names_side = "bottom", + row_names_side = "right", + rect_gp = gpar(col="white", lwd=1), + show_heatmap_legend = TRUE, + heatmap_legend_param = list(title_gp=gpar(fontsize=12, fontface="bold"), labels_gp=gpar(fontsize=10), direction="horizontal"), + column_title_gp = gpar(fontsize=14, fontface="bold"), + row_names_gp = gpar(fontsize=10, fontface="bold"), + column_names_gp = gpar(fontsize=10, fontface="bold"), + height = unit(5, "mm")*nrow(mat), + width = unit(5, "mm")*ncol(mat), + col = viridis(50)) ## Size of heatmaps scaled based on matrix dimensions: https://jokergoo.github.io/ComplexHeatmap-reference/book/other-tricks.html#set-the-same-cell-size-for-different-heatmaps-with-different-dimensions - height = 0.1969*nrow(mat) + 2 - width = 0.1969*ncol(mat) + (2*0.7) + height = 0.1969*nrow(mat) + (2*1.5) + width = 0.1969*ncol(mat) + (2*1.5) outfile <- paste(OUTDIR,"all_samples.",OUTSUFFIX,".heatmap.pdf", sep='') pdf(file=outfile, height=height, width=width) - draw(heatmap, heatmap_legend_side="top") + draw(heatmap, heatmap_legend_side="bottom") dev.off() } From d3a34c815c63c0ac488bc638035e6069fb03a948 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 18:24:03 +0100 Subject: [PATCH 115/129] Update paths --- main.nf | 56 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 14 deletions(-) diff --git a/main.nf b/main.nf index 91e63dc8..6cb43f15 100644 --- a/main.nf +++ b/main.nf @@ -1472,7 +1472,10 @@ process VARSCAN2_SNPEFF { */ process VARSCAN2_QUAST { label 'process_medium' - publishDir "${params.outdir}/variants/varscan2/quast", mode: params.publish_dir_mode + publishDir "${params.outdir}/variants/varscan2/quast", mode: params.publish_dir_mode, + saveAs: { filename -> + if (!filename.endsWith(".tsv")) filename + } when: !params.skip_variants && 'varscan2' in callers && !params.skip_variants_quast @@ -1484,7 +1487,7 @@ process VARSCAN2_QUAST { output: path "AF${params.max_allele_freq}" - path "AF${params.max_allele_freq}/report.tsv" into ch_varscan2_quast_mqc + path "report.tsv" into ch_varscan2_quast_mqc script: features = params.gff ? "--features $gff" : "" @@ -1495,6 +1498,7 @@ process VARSCAN2_QUAST { $features \\ --threads $task.cpus \\ ${consensus.join(' ')} + ln -s AF${params.max_allele_freq}/report.tsv """ } @@ -1659,7 +1663,10 @@ process IVAR_SNPEFF { */ process IVAR_QUAST { label 'process_medium' - publishDir "${params.outdir}/variants/ivar/quast", mode: params.publish_dir_mode + publishDir "${params.outdir}/variants/ivar/quast", mode: params.publish_dir_mode, + saveAs: { filename -> + if (!filename.endsWith(".tsv")) filename + } when: !params.skip_variants && 'ivar' in callers && !params.skip_variants_quast @@ -1671,7 +1678,7 @@ process IVAR_QUAST { output: path "AF${params.max_allele_freq}" - path "AF${params.max_allele_freq}/report.tsv" into ch_ivar_quast_mqc + path "report.tsv" into ch_ivar_quast_mqc script: features = params.gff ? "--features $gff" : "" @@ -1682,6 +1689,7 @@ process IVAR_QUAST { $features \\ --threads $task.cpus \\ ${consensus.join(' ')} + ln -s AF${params.max_allele_freq}/report.tsv """ } @@ -1824,7 +1832,10 @@ process BCFTOOLS_SNPEFF { */ process BCFTOOLS_QUAST { label 'process_medium' - publishDir "${params.outdir}/variants/bcftools", mode: params.publish_dir_mode + publishDir "${params.outdir}/variants/bcftools", mode: params.publish_dir_mode, + saveAs: { filename -> + if (!filename.endsWith(".tsv")) filename + } when: !params.skip_variants && 'bcftools' in callers && !params.skip_variants_quast @@ -1836,7 +1847,7 @@ process BCFTOOLS_QUAST { output: path "quast" - path "quast/report.tsv" into ch_bcftools_quast_mqc + path "report.tsv" into ch_bcftools_quast_mqc script: features = params.gff ? "--features $gff" : "" @@ -1847,6 +1858,7 @@ process BCFTOOLS_QUAST { $features \\ --threads $task.cpus \\ ${consensus.join(' ')} + ln -s quast/report.tsv """ } @@ -2211,7 +2223,10 @@ process SPADES_PLASMIDID { process SPADES_QUAST { label 'process_medium' label 'error_ignore' - publishDir "${params.outdir}/assembly/spades", mode: params.publish_dir_mode + publishDir "${params.outdir}/assembly/spades", mode: params.publish_dir_mode, + saveAs: { filename -> + if (!filename.endsWith(".tsv")) filename + } when: !params.skip_assembly && 'spades' in assemblers && !params.skip_assembly_quast @@ -2223,7 +2238,7 @@ process SPADES_QUAST { output: path "quast" - path "quast/report.tsv" into ch_quast_spades_mqc + path "report.tsv" into ch_quast_spades_mqc script: features = params.gff ? "--features $gff" : "" @@ -2234,6 +2249,7 @@ process SPADES_QUAST { $features \\ --threads $task.cpus \\ ${scaffolds.join(' ')} + ln -s quast/report.tsv """ } @@ -2493,7 +2509,10 @@ process METASPADES_PLASMIDID { process METASPADES_QUAST { label 'process_medium' label 'error_ignore' - publishDir "${params.outdir}/assembly/metaspades", mode: params.publish_dir_mode + publishDir "${params.outdir}/assembly/metaspades", mode: params.publish_dir_mode, + saveAs: { filename -> + if (!filename.endsWith(".tsv")) filename + } when: !params.skip_assembly && 'metaspades' in assemblers && !single_end && !params.skip_assembly_quast @@ -2505,7 +2524,7 @@ process METASPADES_QUAST { output: path "quast" - path "quast/report.tsv" into ch_quast_metaspades_mqc + path "report.tsv" into ch_quast_metaspades_mqc script: features = params.gff ? "--features $gff" : "" @@ -2516,6 +2535,7 @@ process METASPADES_QUAST { $features \\ --threads $task.cpus \\ ${scaffolds.join(' ')} + ln -s quast/report.tsv """ } @@ -2773,7 +2793,10 @@ process UNICYCLER_PLASMIDID { process UNICYCLER_QUAST { label 'process_medium' label 'error_ignore' - publishDir "${params.outdir}/assembly/unicycler", mode: params.publish_dir_mode + publishDir "${params.outdir}/assembly/unicycler", mode: params.publish_dir_mode, + saveAs: { filename -> + if (!filename.endsWith(".tsv")) filename + } when: !params.skip_assembly && 'unicycler' in assemblers && !params.skip_assembly_quast @@ -2785,7 +2808,7 @@ process UNICYCLER_QUAST { output: path "quast" - path "quast/report.tsv" into ch_quast_unicycler_mqc + path "report.tsv" into ch_quast_unicycler_mqc script: features = params.gff ? "--features $gff" : "" @@ -2796,6 +2819,7 @@ process UNICYCLER_QUAST { $features \\ --threads $task.cpus \\ ${scaffolds.join(' ')} + ln -s quast/report.tsv """ } @@ -3042,7 +3066,10 @@ process MINIA_PLASMIDID { process MINIA_QUAST { label 'process_medium' label 'error_ignore' - publishDir "${params.outdir}/assembly/minia/${params.minia_kmer}", mode: params.publish_dir_mode + publishDir "${params.outdir}/assembly/minia/${params.minia_kmer}", mode: params.publish_dir_mode, + saveAs: { filename -> + if (!filename.endsWith(".tsv")) filename + } when: !params.skip_assembly && 'minia' in assemblers && !params.skip_assembly_quast @@ -3054,7 +3081,7 @@ process MINIA_QUAST { output: path "quast" - path "quast/report.tsv" into ch_quast_minia_mqc + path "report.tsv" into ch_quast_minia_mqc script: features = params.gff ? "--features $gff" : "" @@ -3065,6 +3092,7 @@ process MINIA_QUAST { $features \\ --threads $task.cpus \\ ${scaffolds.join(' ')} + ln -s quast/report.tsv """ } From c67d09c421cf5303d7ffe42bb523eaf042d6047c Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 19:56:39 +0100 Subject: [PATCH 116/129] Update example multiqc report --- docs/html/multiqc_report.html | 175 ++++++++++------------------------ 1 file changed, 53 insertions(+), 122 deletions(-) diff --git a/docs/html/multiqc_report.html b/docs/html/multiqc_report.html index a0309e10..e8de5c3d 100644 --- a/docs/html/multiqc_report.html +++ b/docs/html/multiqc_report.html @@ -23,7 +23,7 @@ MultiQC Report - + @@ -5455,10 +5455,6 @@

    Alignment Summary -
  • - Base Distribution -
  • -
  • Insert Size
  • @@ -5467,14 +5463,6 @@

    Mark Duplicates -
  • - Mean Base Quality by Cycle -
  • - -
  • - Base Quality Distribution -
  • -
  • WGS Coverage
  • @@ -6447,12 +6435,12 @@

    JavaScript Disabled

    Report - generated on 2020-06-18, 13:38 + generated on 2020-06-22, 18:47 based on data in: - nfcore/viralrecon/test_full/work/c1/84674b294e014dce8501b3e9ec48ec

    + nfcore/viralrecon/test_full/work/fb/bfe7e69a95d108076e42773b526fd3

    @@ -6518,7 +6506,7 @@

    Variant calling metrics

    -
    Sample# Input reads# Trimmed reads (fastp)% Mapped reads (viral)# Trimmed reads (iVar)# Duplicate reads# Reads after MarkDuplicatesInsert size meanInsert size std devCoverage meanCoverage std dev% Coverage > 10x# High conf SNPs (VarScan 2)# High conf INDELs (VarScan 2)# High conf SNPs (iVar)# High conf INDELs (iVar)# High conf SNPs (BCFTools)# High conf INDELs (BCFTools)# Missense variants (VarScan 2)# Missense variants (iVar)# Missense variants (BCFTools)# Ns per 100kb consensus (VarScan 2)# Ns per 100kb consensus (iVar)# Ns per 100kb consensus (BCFTools)
    sample1
    2755026
    2384570
    100
    2371846
    2216597
    2371846
    523
    215
    1096
    479
    1
    6
    0
    6
    0
    6
    0
    2
    2
    6
    224
    167
    224
    sample2
    2139958
    1913910
    99
    1890837
    1816623
    1890837
    480
    177
    499
    312
    1
    6
    0
    7
    0
    7
    0
    4
    5
    5
    338
    292
    338
    + Sample# Input reads# Trimmed reads (fastp)% Mapped reads (viral)# Trimmed reads (iVar)# Duplicate reads# Reads after MarkDuplicatesInsert size meanInsert size std devCoverage meanCoverage std dev% Coverage > 10x# High conf SNPs (VarScan 2)# High conf INDELs (VarScan 2)# High conf SNPs (iVar)# High conf INDELs (iVar)# High conf SNPs (BCFTools)# High conf INDELs (BCFTools)# Missense variants (VarScan 2)# Missense variants (iVar)# Missense variants (BCFTools)# Ns per 100kb consensus (VarScan 2)# Ns per 100kb consensus (iVar)# Ns per 100kb consensus (BCFTools)sample1
    2755026
    2384570
    100
    2372162
    2216894
    2372162
    523
    216
    1095
    479
    1
    6
    0
    6
    0
    6
    0
    2
    2
    2
    224
    164
    224
    sample2
    2139958
    1913910
    99
    1891311
    1816848
    1891311
    478
    179
    498
    311
    1
    7
    0
    7
    0
    6
    0
    5
    5
    5
    338
    288
    338
    -

    Error - was not able to plot data.

    +
    + + +
    +
    loading..
    +

    @@ -13066,11 +12987,11 @@

    - - - + + +
    -
    loading..
    +
    loading..

    @@ -13141,10 +13062,10 @@

    - - + +
    -
    loading..
    +
    loading..

    @@ -13244,28 +13165,31 @@

    nf-core/viralrecon Software Versions

    nf-core/viralrecon
    v1.1.0
    Nextflow
    v20.01.0
    parallel-fastq-dump
    v0.6.6
    +
    SRA-Tools
    v2.10.7
    FastQC
    v0.11.9
    fastp
    v0.20.1
    Bowtie 2
    N/A
    Samtools
    v1.9
    BEDTools
    v2.29.2
    +
    Mosdepth
    v0.2.6
    Picard
    v2.23.0
    iVar
    v1.2.2
    VarScan 2
    v2.4.4
    +
    BCFTools
    v1.9
    SnpEff
    v4.5covid19
    SnpSift
    v4.3t
    -
    BCFTools
    v1.9
    +
    QUAST
    v5.0.2
    Cutadapt
    v2.10
    Kraken2
    v2.0.9-beta
    SPAdes
    v3.14.0
    Unicycler
    v0.4.7
    minia
    v3.2.4
    -
    Minimap2
    v2.17-r941
    -
    vg
    v1.24.0
    BLAST
    v2.9.0+
    ABACAS
    v1.3.1
    -
    QUAST
    v5.0.2
    +
    plasmidID
    v1.6.3
    Bandage
    v0.8.1
    +
    Minimap2
    v2.17-r941
    +
    vg
    v1.24.0
    R
    v3.6.2
    MultiQC
    v1.9
    @@ -13293,7 +13217,7 @@

    nf-core/viralrecon Workflow Summary

    -
    Run Name
    elated_kilby
    +
    Run Name
    distracted_yalow
    Samplesheet
    https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_amplicon.csv
    Protocol
    amplicon
    Amplicon Fasta File
    https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/amplicon/nCoV-2019.artic.V1.primer.fasta
    @@ -13303,16 +13227,23 @@

    nf-core/viralrecon Workflow Summary

    Viral Genome
    NC_045512.2
    Viral Fasta File
    https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.fna.gz
    Viral GFF
    https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/NC_045512.2/GCF_009858895.2_ASM985889v3_genomic.200409.gff.gz
    -
    Host Kraken2 DB
    https://zenodo.org/record/3738199/files/kraken2_human.tar.gz
    -
    Host Kraken2 Name
    human
    -
    Cut Mean Quality
    30
    -
    Qualified Phred
    30
    -
    Unqualified Perc Limit
    10
    -
    Min Trim Length
    50
    +
    Fastp Mean Qual
    30
    +
    Fastp Qual Phred
    30
    +
    Fastp Unqual % Limit
    10
    +
    Fastp Min Trim Length
    50
    Variant Calling Tools
    varscan2,ivar,bcftools
    +
    Min Mapped Reads
    1000
    +
    iVar Trim Min Len
    20
    +
    iVar Trim Min Qual
    20
    +
    iVar Trim Window
    4
    +
    Mpileup Depth
    N/A
    Min Base Quality
    20
    Min Read Depth
    10
    -
    Max Allele Freq
    0.8
    +
    Min Allele Freq
    0.25
    +
    Max Allele Freq
    0.75
    +
    Varscan2 Strand Filter
    Yes
    +
    Host Kraken2 DB
    https://zenodo.org/record/3738199/files/kraken2_human.tar.gz
    +
    Host Kraken2 Name
    human
    Assembly Tools
    spades,metaspades,unicycler,minia
    Minia Kmer Size
    31
    Max Resources
    224 GB memory, 32 cpus, 3d time per job
    From 90f4e8f3a4d0cf414a5607b5e2ee8b65e8c6197a Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 20:38:16 +0100 Subject: [PATCH 117/129] Add median --- bin/plot_mosdepth_regions.r | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/plot_mosdepth_regions.r b/bin/plot_mosdepth_regions.r index 3ce79c02..195726f6 100755 --- a/bin/plot_mosdepth_regions.r +++ b/bin/plot_mosdepth_regions.r @@ -103,7 +103,7 @@ for (sample in unique(dat$sample)) { expand_limits(y=1) + ylab(bquote('log'[10]~'(Coverage+1)')) + xlab('Amplicon') + - ggtitle(paste(sample,'per amplicon coverage')) + ggtitle(paste(sample,'median coverage per amplicon')) outfile <- paste(OUTDIR,sample,".",OUTSUFFIX,".coverage.pdf", sep='') ggsave(file=outfile, plot, height=3+(0.3*length(unique(sample_dat$region))), width=16, units="cm") @@ -137,7 +137,7 @@ if (ncol(dat) == 6 && length(INPUT_FILES) > 1) { rownames(mat) <- mat[,1] mat <- t(as.matrix(log10(mat[,-1] + 1))) heatmap <- Heatmap(mat, - column_title = "Heatmap to show amplicon coverage across multiple samples", + column_title = "Heatmap to show median amplicon coverage across samples", name = "log10(Coverage+1)", cluster_rows = TRUE, cluster_columns = FALSE, From b77378bbd1f8a1f982195ab2fab9feb9d6ca9f4f Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 20:38:48 +0100 Subject: [PATCH 118/129] Add median flag to mosdepth --- main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/main.nf b/main.nf index 6cb43f15..62a65a52 100644 --- a/main.nf +++ b/main.nf @@ -1221,6 +1221,7 @@ if (params.protocol == 'amplicon') { mosdepth \\ --by amplicon.collapsed.bed \\ --fast-mode \\ + --use-median \\ --thresholds 0,1,10,50,100,500 \\ ${prefix} \\ ${bam[0]} From 7fbdfc49a29ee51e1a57f14335b968c95f94d71e Mon Sep 17 00:00:00 2001 From: drpatelh Date: Mon, 22 Jun 2020 22:42:17 +0100 Subject: [PATCH 119/129] Fix config --- assets/multiqc_config.yaml | 4 ++-- main.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 4d2a0664..65abc05f 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -160,6 +160,8 @@ module_order: - kraken: name: 'ASSEMBLY: Kraken 2' info: 'This section of the report shows Kraken 2 classification results for reads after primer sequence trimming with Cutadapt.' + path_filters: + - './kraken2/*.txt' - quast: name: 'ASSEMBLY: QUAST (SPAdes)' anchor: 'quast_spades' @@ -283,8 +285,6 @@ sp: fn: '*.varscan2.log' cutadapt: fn: '*.cutadapt.log' - kraken: - fn: '*.kraken2.report.txt' picard/alignment_metrics: fn: '*.alignment_summary_metrics' picard/insertsize: diff --git a/main.nf b/main.nf index 62a65a52..a81b6032 100644 --- a/main.nf +++ b/main.nf @@ -955,7 +955,7 @@ def check_mapped(sample,flagstat,min_mapped_reads=500) { c_green = params.monochrome_logs ? '' : "\033[0;32m"; c_red = params.monochrome_logs ? '' : "\033[0;31m"; if (mapped < min_mapped_reads.toInteger()) { - log.info "${c_red}>>>> $sample FAILED MAPPED READ THRESHOLD: ${mapped} < ${params.min_mapped_reads}. IGNORING FOR FURTHER DOWNSTREAM ANALYSIS! <<<<${c_reset}" + log.info ">${c_red}>>>> $sample FAILED MAPPED READ THRESHOLD: ${mapped} < ${params.min_mapped_reads}. IGNORING FOR FURTHER DOWNSTREAM ANALYSIS! <<<<${c_reset}<" fail_mapped_reads[sample] = mapped return false } else { From 55945adf6e969b56f3476a2a313f389a2ec4ec43 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 23 Jun 2020 07:36:22 +0100 Subject: [PATCH 120/129] Update main.nf --- main.nf | 1 - 1 file changed, 1 deletion(-) diff --git a/main.nf b/main.nf index a81b6032..0b5caea7 100644 --- a/main.nf +++ b/main.nf @@ -3245,7 +3245,6 @@ process get_software_versions { echo $workflow.manifest.version > v_pipeline.txt echo $workflow.nextflow.version > v_nextflow.txt parallel-fastq-dump --version > v_parallel_fastq_dump.txt - sra-pileup --version > v_sratools.txt fastqc --version > v_fastqc.txt fastp --version 2> v_fastp.txt bowtie2 --version > v_bowtie2.txt From c9d40fb3c3f0bf413d593d43c6483f0eca234249 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 23 Jun 2020 08:14:28 +0100 Subject: [PATCH 121/129] Minor fixes --- .github/workflows/awstest.yml | 1 + assets/multiqc_config.yaml | 2 -- bin/scrape_software_versions.py | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 84f27c4d..ce8fd3b6 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -6,6 +6,7 @@ on: push: branches: - master + - dev release: types: [published] diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml index 65abc05f..681ce675 100644 --- a/assets/multiqc_config.yaml +++ b/assets/multiqc_config.yaml @@ -160,8 +160,6 @@ module_order: - kraken: name: 'ASSEMBLY: Kraken 2' info: 'This section of the report shows Kraken 2 classification results for reads after primer sequence trimming with Cutadapt.' - path_filters: - - './kraken2/*.txt' - quast: name: 'ASSEMBLY: QUAST (SPAdes)' anchor: 'quast_spades' diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py index 021d5d3c..adeaa10e 100755 --- a/bin/scrape_software_versions.py +++ b/bin/scrape_software_versions.py @@ -7,7 +7,6 @@ 'nf-core/viralrecon': ['v_pipeline.txt', r"(\S+)"], 'Nextflow': ['v_nextflow.txt', r"(\S+)"], 'parallel-fastq-dump': ['v_parallel_fastq_dump.txt', r"parallel-fastq-dump\s:\s(\S+)"], - 'SRA-Tools': ['v_sratools.txt', r'"sra-pileup"\sversion\s(\S+)'], 'FastQC': ['v_fastqc.txt', r"FastQC\sv(\S+)"], 'fastp': ['v_fastp.txt', r"fastp\s(\S+)"], 'Bowtie 2': ['v_bowtie2.txt', r"bowtie2-align-s\sversion\s(\S+)"], @@ -39,7 +38,6 @@ results['nf-core/viralrecon'] = 'N/A' results['Nextflow'] = 'N/A' results['parallel-fastq-dump'] = 'N/A' -results['SRA-Tools'] = 'N/A' results['FastQC'] = 'N/A' results['fastp'] = 'N/A' results['Bowtie 2'] = 'N/A' From 06d8c1fdab38b3e4b56231ae3d1a97e644013710 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 23 Jun 2020 11:13:28 +0100 Subject: [PATCH 122/129] Add biostrings --- CHANGELOG.md | 1 + environment.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d9526cf8..2ac451c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. * Add mosdepth `0.2.6` * Add bioconductor-complexheatmap `2.2.0` +* Add bioconductor-biostrings `2.54.0` * Add r-optparse `1.6.6` * Add r-tidyr `1.1.0` * Add r-tidyverse `1.3.0` diff --git a/environment.yml b/environment.yml index 8edb5f53..292345b2 100644 --- a/environment.yml +++ b/environment.yml @@ -42,6 +42,7 @@ dependencies: - bioconda::snpeff=4.5covid19 - bioconda::snpsift=4.3.1t - bioconda::bioconductor-complexheatmap=2.2.0 + - bioconda::bioconductor-biostrings=2.54.0 ## assembly - bioconda::cutadapt=2.10 From 0a7325e0a42b5f06afcb622c74962a3899157eca Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 23 Jun 2020 11:13:51 +0100 Subject: [PATCH 123/129] Test for single caller and assembler --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 36f2f8c2..737a8f85 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -61,7 +61,7 @@ jobs: NXF_ANSI_LOG: false strategy: matrix: - parameters: [--skip_adapter_trimming, --skip_markduplicates, --skip_variants, --skip_amplicon_trimming, --skip_kraken2, --skip_assembly] + parameters: [--skip_adapter_trimming, --skip_markduplicates, --skip_variants, --skip_amplicon_trimming, --skip_kraken2, --skip_assembly, '--callers ivar --assemblers spades'] steps: - name: Check out pipeline code uses: actions/checkout@v2 From bebfbf35684a28ebf000fb70250dfe08b8f0a740 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 23 Jun 2020 13:26:37 +0100 Subject: [PATCH 124/129] Initial commit --- bin/plot_base_density.r | 176 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 176 insertions(+) create mode 100755 bin/plot_base_density.r diff --git a/bin/plot_base_density.r b/bin/plot_base_density.r new file mode 100755 index 00000000..e62eb647 --- /dev/null +++ b/bin/plot_base_density.r @@ -0,0 +1,176 @@ +#!/usr/bin/env Rscript + +################################################ +################################################ +## LOAD LIBRARIES ## +################################################ +################################################ + +library(optparse) +library(ggplot2) +library(scales) +library(reshape2) +library(Biostrings) + +################################################ +################################################ +## VALIDATE COMMAND-LINE PARAMETERS ## +################################################ +################################################ + +option_list <- list(make_option(c("-i", "--fasta_files"), type="character", default=NULL, help="Comma-separated list of fasta files", metavar="fasta_files"), + make_option(c("-s", "--prefixes"), type="character", default=NULL, help="Comma-separated list of prefixes associated with fasta files to add to plots. Must be unique and in same order as fasta file input.", metavar="prefixes"), + make_option(c("-o", "--output_dir"), type="character", default='./', help="Output directory", metavar="path")) + +opt_parser <- OptionParser(option_list=option_list) +opt <- parse_args(opt_parser) + +## Check input files +INPUT_FILES <- unique(unlist(strsplit(opt$fasta_files,","))) +if (length(INPUT_FILES) == 0) { + print_help(opt_parser) + stop("At least one input file must be supplied", call.=FALSE) +} +if (!all(file.exists(INPUT_FILES))) { + stop(paste("The following input files don't exist:",paste(INPUT_FILES[!file.exists(INPUT_FILES)], sep='', collapse=' '), sep=' '), call.=FALSE) +} + +## Check prefixes for input files +PREFIXES <- basename(INPUT_FILES) +if (!is.null(opt$prefixes)){ + PREFIXES <- unique(unlist(strsplit(opt$prefixes,","))) + if (length(INPUT_FILES) != length(PREFIXES)) { + print_help(opt_parser) + stop("Please provide a unique prefix for each fasta file.", call.=FALSE) + } +} + +## Check the output directory has a trailing slash, if not add one +OUTDIR <- opt$output_dir +if (tail(strsplit(OUTDIR,"")[[1]],1)!="/") { + OUTDIR <- paste(OUTDIR,"/",sep='') +} +## Create the directory if it doesn't already exist. +if (!file.exists(OUTDIR)) { + dir.create(OUTDIR, recursive=TRUE) +} + +################################################ +################################################ +## READ IN DATA ## +################################################ +################################################ + +dat <- NULL +for (input_file in INPUT_FILES) { + dat <- c(dat,readDNAStringSet(input_file)[1]) +} + +################################################ +################################################ +## PLOTS ## +################################################ +################################################ + +bases_std <- c("A","C","T","G") +base_cols <- c("A" = "#009E73", + "C" = "#0072B2", + "T" = "#D55E00", + "G" = "#000000", + "N" = "#E69F00", + "X" = "#999999") + +for (idx in 1:length(dat)) { + + ## Table of base counts + base_seq <- strsplit(toString(dat[[idx]]), "")[[1]] + base_tab <- data.frame(table(base_seq), stringsAsFactors=FALSE) + colnames(base_tab) <- c("base","freq") + rownames(base_tab) <- base_tab$base + for (base in 1:length(bases_std)) { + if (!any(base_tab$base %in% bases_std[base])) { + base_tab <- rbind(base_tab,c(bases_std[base],0)) + } + } + base_tab$perc <- 100 *base_tab$freq / sum(base_tab$freq) + base_tab <- base_tab[order(base_tab$base, decreasing=FALSE),] + base_tab <- rbind(base_tab[c(bases_std, "N"),], base_tab[!rownames(base_tab) %in% c(bases_std, "N"),]) + base_tab$base <- factor(base_tab$base, levels=rownames(base_tab)) + outfile <- paste(OUTDIR,PREFIXES[idx],".base_counts.txt", sep='') + write.table(base_tab, file=outfile, col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) + + ## Barplot of base frequencies + barplot <- ggplot(base_tab, aes(x=base,y=perc)) + + geom_bar(stat="identity") + + theme_classic() + + scale_y_continuous(limits=c(0,100),breaks=c(0,25,50,75,100)) + + ylab("% Observed") + + xlab("Base") + + ggtitle(PREFIXES[idx]) + outfile <- paste(OUTDIR,PREFIXES[idx],".base_counts.pdf", sep='') + ggsave(file=outfile, barplot, width=12, height=10, units="cm") + + ## Create a data frame of base coverage + bases <- unique(c(bases_std,"N",unique(base_seq))) + base_dat <- data.frame(sample=names(dat[[idx]])[1], position=1:length(base_seq), stringsAsFactors=FALSE) + for (base in 1:length(bases)) { + base_dat[,bases[base]] <- as.numeric(base_seq==bases[base]) + } + + ## Stretches of N's + N_rle <- Rle(base_dat[,"N"]) + N_dat <- data.frame(start=cumsum(runLength(N_rle))[runValue(N_rle)==1], width=runLength(N_rle)[runValue(N_rle)==1]) + outfile <- paste(OUTDIR,PREFIXES[idx],".N_run.txt", sep='') + write.table(N_dat, file=outfile, col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) + + ## Boxplot of N frequencies + boxplot <- ggplot(N_dat,aes(x="", y=width)) + + geom_boxplot() + + geom_jitter() + + theme_classic() + + theme(panel.border=element_rect(colour="black", fill=NA, size=1)) + + ylab("Stretch of consecutive Ns") + + xlab("") + + ggtitle(PREFIXES[idx]) + outfile <- paste(OUTDIR,PREFIXES[idx], ".N_run.pdf", sep='') + ggsave(file=outfile, boxplot, width=8, height=10, units="cm") + + ## Running mean of bp density for standard bases + run_k <- 1001 + run_dat <- base_dat[,c("sample", "position", bases_std)] + for (base in bases_std) { + run_dat[,base] <- as.numeric(runmean(Rle(base_dat[,base]), k=run_k, endrule="constant")) + } + run_dat <- melt(run_dat, c(1,2)) + colnames(run_dat)[3] <- "base" + run_dat$position <- run_dat$position/1000 + lineplot <- ggplot(run_dat,aes(x=position, y=value, colour=base)) + + geom_line() + + theme_classic() + + theme(panel.border=element_rect(colour="black", fill=NA, size=1)) + + scale_y_continuous(breaks=c(0,1), labels=c(0,1)) + + xlab("Position (Kb)") + + ylab(paste("Base density (running mean k=",run_k,")", sep='')) + + ggtitle(PREFIXES[idx]) + + scale_colour_manual(values=base_cols) + outfile <- paste(OUTDIR,PREFIXES[idx], ".ACTG_density.pdf", sep='') + ggsave(file=outfile, lineplot, width=18, height=10, units="cm") + + ## Single base density plots, nucleotide resolution. + bases_other <- bases[!bases %in% bases_std] + for (obase in bases_other) { + plot_dat <- base_dat[,c("sample", "position", obase)] + colnames(plot_dat)[3] <- "base" + plot_col <- ifelse(obase=="N", base_cols[["N"]], base_cols[["X"]]) + lineplot <- ggplot(plot_dat, aes(x=position/1000, y=base)) + + geom_line(colour=plot_col) + + theme_classic() + + theme(legend.position="none", panel.border=element_rect(colour="black", fill=NA, size=1)) + + scale_y_continuous(breaks=c(0,1), labels=c(0,1)) + + xlab("Position (Kb)") + + ylab(paste(obase,"density", sep=' ')) + + ggtitle(PREFIXES[idx]) + outfile <- paste(OUTDIR,PREFIXES[idx], ".", obase, "_density.pdf", sep='') + ggsave(file=outfile, lineplot, width=18, height=10, units="cm") + } +} From f29f98ad35e2352d08690fab9de0c48f81f633aa Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 23 Jun 2020 14:06:18 +0100 Subject: [PATCH 125/129] Update code --- bin/plot_base_density.r | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/bin/plot_base_density.r b/bin/plot_base_density.r index e62eb647..4406b8c5 100755 --- a/bin/plot_base_density.r +++ b/bin/plot_base_density.r @@ -96,7 +96,7 @@ for (idx in 1:length(dat)) { base_tab <- base_tab[order(base_tab$base, decreasing=FALSE),] base_tab <- rbind(base_tab[c(bases_std, "N"),], base_tab[!rownames(base_tab) %in% c(bases_std, "N"),]) base_tab$base <- factor(base_tab$base, levels=rownames(base_tab)) - outfile <- paste(OUTDIR,PREFIXES[idx],".base_counts.txt", sep='') + outfile <- paste(OUTDIR, PREFIXES[idx], ".base_counts.tsv", sep='') write.table(base_tab, file=outfile, col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) ## Barplot of base frequencies @@ -107,7 +107,7 @@ for (idx in 1:length(dat)) { ylab("% Observed") + xlab("Base") + ggtitle(PREFIXES[idx]) - outfile <- paste(OUTDIR,PREFIXES[idx],".base_counts.pdf", sep='') + outfile <- paste(OUTDIR, PREFIXES[idx], ".base_counts.pdf", sep='') ggsave(file=outfile, barplot, width=12, height=10, units="cm") ## Create a data frame of base coverage @@ -120,21 +120,9 @@ for (idx in 1:length(dat)) { ## Stretches of N's N_rle <- Rle(base_dat[,"N"]) N_dat <- data.frame(start=cumsum(runLength(N_rle))[runValue(N_rle)==1], width=runLength(N_rle)[runValue(N_rle)==1]) - outfile <- paste(OUTDIR,PREFIXES[idx],".N_run.txt", sep='') + outfile <- paste(OUTDIR, PREFIXES[idx], ".N_run.tsv", sep='') write.table(N_dat, file=outfile, col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) - ## Boxplot of N frequencies - boxplot <- ggplot(N_dat,aes(x="", y=width)) + - geom_boxplot() + - geom_jitter() + - theme_classic() + - theme(panel.border=element_rect(colour="black", fill=NA, size=1)) + - ylab("Stretch of consecutive Ns") + - xlab("") + - ggtitle(PREFIXES[idx]) - outfile <- paste(OUTDIR,PREFIXES[idx], ".N_run.pdf", sep='') - ggsave(file=outfile, boxplot, width=8, height=10, units="cm") - ## Running mean of bp density for standard bases run_k <- 1001 run_dat <- base_dat[,c("sample", "position", bases_std)] @@ -148,12 +136,12 @@ for (idx in 1:length(dat)) { geom_line() + theme_classic() + theme(panel.border=element_rect(colour="black", fill=NA, size=1)) + - scale_y_continuous(breaks=c(0,1), labels=c(0,1)) + + scale_y_continuous(breaks=c(0,0.25,0.50,0.75,1)) + xlab("Position (Kb)") + ylab(paste("Base density (running mean k=",run_k,")", sep='')) + ggtitle(PREFIXES[idx]) + scale_colour_manual(values=base_cols) - outfile <- paste(OUTDIR,PREFIXES[idx], ".ACTG_density.pdf", sep='') + outfile <- paste(OUTDIR, PREFIXES[idx], ".ACTG_density.pdf", sep='') ggsave(file=outfile, lineplot, width=18, height=10, units="cm") ## Single base density plots, nucleotide resolution. @@ -170,7 +158,7 @@ for (idx in 1:length(dat)) { xlab("Position (Kb)") + ylab(paste(obase,"density", sep=' ')) + ggtitle(PREFIXES[idx]) - outfile <- paste(OUTDIR,PREFIXES[idx], ".", obase, "_density.pdf", sep='') + outfile <- paste(OUTDIR, PREFIXES[idx], ".", obase, "_density.pdf", sep='') ggsave(file=outfile, lineplot, width=18, height=10, units="cm") } } From 366ef1f69c215784fa767985ee7bf84c2889b503 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 23 Jun 2020 14:06:32 +0100 Subject: [PATCH 126/129] Plot consensus base density --- main.nf | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/main.nf b/main.nf index 0b5caea7..3c8988d0 100644 --- a/main.nf +++ b/main.nf @@ -1363,7 +1363,12 @@ process VARSCAN2 { process VARSCAN2_CONSENSUS { tag "$sample" label 'process_medium' - publishDir "${params.outdir}/variants/varscan2/consensus", mode: params.publish_dir_mode + publishDir "${params.outdir}/variants/varscan2/consensus", mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.endsWith(".tsv")) "base_qc/$filename" + else if (filename.endsWith(".pdf")) "base_qc/$filename" + else filename + } when: !params.skip_variants && 'varscan2' in callers @@ -1374,7 +1379,7 @@ process VARSCAN2_CONSENSUS { output: tuple val(sample), val(single_end), path("*consensus.masked.fa") into ch_varscan2_consensus - path "*consensus.fa" + path "*.{consensus.fa,tsv,pdf}" script: prefix = "${sample}.AF${params.max_allele_freq}" @@ -1393,6 +1398,8 @@ process VARSCAN2_CONSENSUS { -fo ${prefix}.consensus.masked.fa header=\$(head -n 1 ${prefix}.consensus.masked.fa | sed 's/>//g') sed -i "s/\${header}/${sample}/g" ${prefix}.consensus.masked.fa + + plot_base_density.r --fasta_files ${prefix}.consensus.masked.fa --prefixes $sample --output_dir ./ """ } @@ -1565,7 +1572,12 @@ process IVAR_VARIANTS { process IVAR_CONSENSUS { tag "$sample" label 'process_medium' - publishDir "${params.outdir}/variants/ivar/consensus", mode: params.publish_dir_mode + publishDir "${params.outdir}/variants/ivar/consensus", mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.endsWith(".tsv")) "base_qc/$filename" + else if (filename.endsWith(".pdf")) "base_qc/$filename" + else filename + } when: !params.skip_variants && 'ivar' in callers @@ -1576,7 +1588,7 @@ process IVAR_CONSENSUS { output: tuple val(sample), val(single_end), path("*.fa") into ch_ivar_consensus - path "*.txt" + path "*.{txt,tsv,pdf}" script: prefix = "${sample}.AF${params.max_allele_freq}" @@ -1584,6 +1596,8 @@ process IVAR_CONSENSUS { cat $mpileup | ivar consensus -q $params.min_base_qual -t $params.max_allele_freq -m $params.min_coverage -n N -p ${prefix}.consensus header=\$(head -n1 ${prefix}.consensus.fa | sed 's/>//g') sed -i "s/\${header}/${sample}/g" ${prefix}.consensus.fa + + plot_base_density.r --fasta_files ${prefix}.consensus.fa --prefixes $sample --output_dir ./ """ } @@ -1748,7 +1762,12 @@ process BCFTOOLS_VARIANTS { process BCFTOOLS_CONSENSUS { tag "$sample" label 'process_medium' - publishDir "${params.outdir}/variants/bcftools/consensus", mode: params.publish_dir_mode + publishDir "${params.outdir}/variants/bcftools/consensus", mode: params.publish_dir_mode, + saveAs: { filename -> + if (filename.endsWith(".tsv")) "base_qc/$filename" + else if (filename.endsWith(".pdf")) "base_qc/$filename" + else filename + } when: !params.skip_variants && 'bcftools' in callers @@ -1759,7 +1778,7 @@ process BCFTOOLS_CONSENSUS { output: tuple val(sample), val(single_end), path("*consensus.masked.fa") into ch_bcftools_consensus_masked - path "*consensus.fa" + path "*.{consensus.fa,tsv,pdf}" script: """ @@ -1778,6 +1797,8 @@ process BCFTOOLS_CONSENSUS { sed -i 's/${index_base}/${sample}/g' ${sample}.consensus.masked.fa header=\$(head -n1 ${sample}.consensus.masked.fa | sed 's/>//g') sed -i "s/\${header}/${sample}/g" ${sample}.consensus.masked.fa + + plot_base_density.r --fasta_files ${sample}.consensus.masked.fa --prefixes $sample --output_dir ./ """ } From b946c756b98e19029db0049fb67c3fa6625f2f41 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 23 Jun 2020 14:11:38 +0100 Subject: [PATCH 127/129] Remove dev --- .github/workflows/awstest.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index ce8fd3b6..84f27c4d 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -6,7 +6,6 @@ on: push: branches: - master - - dev release: types: [published] From ce78d30a0496871dc5b5098e5d561448b21dc8d3 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 23 Jun 2020 14:41:45 +0100 Subject: [PATCH 128/129] Update docs --- docs/output.md | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/docs/output.md b/docs/output.md index 3b7bb809..30ca09dc 100644 --- a/docs/output.md +++ b/docs/output.md @@ -271,11 +271,17 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- * `variants/varscan2/consensus/` * `.AF.consensus.fa`: Consensus Fasta file generated by integrating the high frequency variants called by VarScan into the reference genome. * `.AF.consensus.masked.fa`: Masked consensus Fasta file. -* `variants/varscan2/log/` - * `.varscan2.log`: Log file generated from stderr by VarScan 2. +* `variants/varscan2/consensus/base_qc/` + * `.AF.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + * `.AF.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + * `.AF.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + * `.AF.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + * `.AF.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. * `variants/varscan2/bcftools_stats/` * `.bcftools_stats.txt`: Statistics and counts obtained from low frequency variants VCF file. * `.AF.bcftools_stats.txt`: Statistics and counts obtained from high frequency variants VCF file. +* `variants/varscan2/log/` + * `.varscan2.log`: Log file generated from stderr by VarScan 2. * `variants/bam/mpileup/` * `..mpileup`: mpileup files summarize all the data from aligned reads at a given genomic position. Each row of the mpileup file gives similar information to a single vertical column of reads as visualised in IGV. @@ -302,6 +308,12 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- * `variants/ivar/consensus/` * `.AF.consensus.fa`: Consensus Fasta file generated by iVar at the frequency threshold set by the `--max_allele_freq` parameter. * `.AF.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. +* `variants/ivar/consensus/base_qc/` + * `.AF.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + * `.AF.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + * `.AF.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + * `.AF.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + * `.AF.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. * `variants/ivar/log/` * `.variant.counts.log`: Variant counts for low frequency variants. * `.AF.variant.counts.log`: Variant counts for high frequency variants. @@ -326,6 +338,12 @@ Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method- * `variants/bcftools/consensus/` * `.consensus.fa`: Consensus Fasta file generated by integrating the variants called by BCFTools into the reference genome. * `.consensus.masked.fa`: Masked consensus Fasta file. +* `variants/bcftools/consensus/base_qc/` + * `.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + * `.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + * `.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + * `.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + * `.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. * `variants/bcftools/bcftools_stats/` * `.bcftools_stats.txt`: Statistics and counts obtained from VCF file. From 40b6eaacc2b05e44eca830b5e515a907162bb978 Mon Sep 17 00:00:00 2001 From: drpatelh Date: Tue, 23 Jun 2020 14:41:52 +0100 Subject: [PATCH 129/129] Fix naming --- main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/main.nf b/main.nf index 3c8988d0..73b4d1c8 100644 --- a/main.nf +++ b/main.nf @@ -1399,7 +1399,7 @@ process VARSCAN2_CONSENSUS { header=\$(head -n 1 ${prefix}.consensus.masked.fa | sed 's/>//g') sed -i "s/\${header}/${sample}/g" ${prefix}.consensus.masked.fa - plot_base_density.r --fasta_files ${prefix}.consensus.masked.fa --prefixes $sample --output_dir ./ + plot_base_density.r --fasta_files ${prefix}.consensus.masked.fa --prefixes $prefix --output_dir ./ """ } @@ -1597,7 +1597,7 @@ process IVAR_CONSENSUS { header=\$(head -n1 ${prefix}.consensus.fa | sed 's/>//g') sed -i "s/\${header}/${sample}/g" ${prefix}.consensus.fa - plot_base_density.r --fasta_files ${prefix}.consensus.fa --prefixes $sample --output_dir ./ + plot_base_density.r --fasta_files ${prefix}.consensus.fa --prefixes $prefix --output_dir ./ """ }