diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..ea27a584 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,27 @@ +{ + "name": "nfcore", + "image": "nfcore/gitpod:latest", + "remoteUser": "gitpod", + + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + // Set *default* container specific settings.json values on container create. + "settings": { + "python.defaultInterpreterPath": "/opt/conda/bin/python", + "python.linting.enabled": true, + "python.linting.pylintEnabled": true, + "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", + "python.formatting.yapfPath": "/opt/conda/bin/yapf", + "python.linting.flake8Path": "/opt/conda/bin/flake8", + "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", + "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", + "python.linting.pylintPath": "/opt/conda/bin/pylint" + }, + + // Add the IDs of extensions you want installed when the container is created. + "extensions": ["ms-python.python", "ms-python.vscode-pylance", "nf-core.nf-core-extensionpack"] + } + } +} diff --git a/.editorconfig b/.editorconfig index b6b31907..b78de6e6 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js}] +[*.{md,yml,yaml,html,css,scss,js,cff}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules diff --git a/.gitattributes b/.gitattributes index 050bb120..7a2dabc2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,4 @@ *.config linguist-language=nextflow +*.nf.test linguist-language=nextflow modules/nf-core/** linguist-generated subworkflows/nf-core/** linguist-generated diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 17e6615b..98e5be93 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -101,3 +101,19 @@ If you are using a new feature from core Nextflow, you may bump the minimum requ ### Images and figures For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). + +## GitHub Codespaces + +This repo includes a devcontainer configuration which will create a GitHub Codespaces for Nextflow development! This is an online developer environment that runs in your browser, complete with VSCode and a terminal. + +To get started: + +- Open the repo in [Codespaces](https://github.com/nf-core/viralrecon/codespaces) +- Tools installed + - nf-core + - Nextflow + +Devcontainer specs: + +- [DevContainer config](.devcontainer/devcontainer.json) +- [Dockerfile](.devcontainer/Dockerfile) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index c8de4b1e..8ccb624a 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -42,7 +42,7 @@ body: attributes: label: System information description: | - * Nextflow version _(eg. 21.10.3)_ + * Nextflow version _(eg. 22.10.1)_ * Hardware _(eg. HPC, Desktop, Cloud)_ * Executor _(eg. slurm, local, awsbatch)_ * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 3ff7d7ac..41f36782 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -15,8 +15,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/vira - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/viralrecon _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/viralrecon _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 5600fcf9..5ab09755 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -29,3 +29,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/viralrecon/results-${{ github.sha }}/platform_${{ matrix.platform }}" } profiles: test_full_${{ matrix.platform }},aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index a3ce4a0a..f6f7af30 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -23,3 +23,7 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/viralrecon/results-test-${{ github.sha }}" } profiles: test,aws_tower + - uses: actions/upload-artifact@v3 + with: + name: Tower debug log file + path: tower_action_*.log diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a47ef384..8770fc54 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,10 @@ on: env: NXF_ANSI_LOG: false - CAPSULE_LOG: none + +concurrency: + group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" + cancel-in-progress: true jobs: test: @@ -20,27 +23,17 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - # Nextflow versions - include: - # Test pipeline minimum Nextflow version - - NXF_VER: "21.10.3" - NXF_EDGE: "" - # Test latest edge release of Nextflow - - NXF_VER: "" - NXF_EDGE: "1" + NXF_VER: + - "22.10.1" + - "latest-everything" steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow - env: - NXF_VER: ${{ matrix.NXF_VER }} - # Uncomment only if the edge release is more recent than the latest stable release - # See https://github.com/nextflow-io/nextflow/issues/2467 - # NXF_EDGE: ${{ matrix.NXF_EDGE }} - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" - name: Run pipeline with test data run: | diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index d1fb62d0..d1e6e5de 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -24,7 +24,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @prettier/plugin-php @@ -34,9 +34,9 @@ jobs: id: prettier_status run: | if prettier --check ${GITHUB_WORKSPACE}; then - echo "::set-output name=result::pass" + echo "result=pass" >> $GITHUB_OUTPUT else - echo "::set-output name=result::fail" + echo "result=fail" >> $GITHUB_OUTPUT fi - name: Run 'prettier --write' diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 77358dee..858d622e 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -4,6 +4,8 @@ name: nf-core linting # that the code meets the nf-core guidelines. on: push: + branches: + - dev pull_request: release: types: [published] @@ -12,9 +14,9 @@ jobs: EditorConfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -25,9 +27,9 @@ jobs: Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v3 - name: Install Prettier run: npm install -g prettier @@ -35,22 +37,48 @@ jobs: - name: Run Prettier --check run: prettier --check ${GITHUB_WORKSPACE} + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Nextflow - env: - CAPSULE_LOG: none - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v3 + - uses: actions/setup-python@v4 with: - python-version: "3.6" + python-version: "3.7" architecture: "x64" - name: Install dependencies @@ -71,7 +99,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 04758f61..0bbcd30f 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -18,7 +18,7 @@ jobs: - name: Get PR number id: pr_number - run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment uses: marocchino/sticky-pull-request-comment@v2 diff --git a/.nf-core.yml b/.nf-core.yml index 192f7e02..40bcac74 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -4,7 +4,3 @@ lint: - assets/email_template.html - assets/email_template.txt - lib/NfcoreTemplate.groovy - files_exist: - - assets/multiqc_config.yml - - conf/igenomes.config - - lib/WorkflowViralrecon.groovy diff --git a/.prettierignore b/.prettierignore index d0e7ae58..437d763d 100644 --- a/.prettierignore +++ b/.prettierignore @@ -1,4 +1,6 @@ email_template.html +adaptivecard.json +slackreport.json .nextflow* work/ data/ @@ -7,3 +9,4 @@ results/ testing/ testing* *.pyc +bin/ diff --git a/CHANGELOG.md b/CHANGELOG.md index a75796a4..c751598a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,75 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[2.6.0](https://github.com/nf-core/viralrecon/releases/tag/2.6.0)] - 2023-03-23 + +### Credits + +Special thanks to the following for their code contributions to the release: + +- [Friederike Hanssen](https://github.com/FriederikeHanssen) +- [Hugo Tavares](https://github.com/tavareshugo) +- [James Fellows Yates](https://github.com/jfy133) +- [Jessica Wu](https://github.com/wutron) +- [Matthew Wells](https://github.com/mattheww95) +- [Maxime Garcia](https://github.com/maxulysse) +- [Phil Ewels](https://github.com/ewels) +- [Sara Monzón](https://github.com/saramonzon) + +Thank you to everyone else that has contributed by reporting bugs, enhancements or in any other way, shape or form. + +### Enhancements & fixes + +- [[#297](https://github.com/nf-core/viralrecon/issues/297)] - Add tube map for pipeline +- [[#316](https://github.com/nf-core/viralrecon/issues/316)] - Variant calling isn't run when using `--skip_asciigenome` with metagenomic data +- [[#317](https://github.com/nf-core/viralrecon/issues/317)] - `ivar_variants_to_vcf`: Ignore lines without annotation in ivar tsv file +- [[#320](https://github.com/nf-core/viralrecon/issues/320)] - Pipeline fails at email step: Failed to invoke `workflow.onComplete` event handler +- [[#321](https://github.com/nf-core/viralrecon/issues/321)] - `ivar_variants_to_vcf` script: Duplicated positions in tsv file due to overlapping annotations +- [[#334](https://github.com/nf-core/viralrecon/issues/334)] - Longshot thread 'main' panicked at 'assertion failed: p <= 0.0' error +- [[#341](https://github.com/nf-core/viralrecon/issues/341)] - `artic/minion` and `artic/guppyplex`: Update module version 1.2.2 -> 1.2.3 +- [[#348](https://github.com/nf-core/viralrecon/issues/348)] - Document full parameters of iVar consensus +- [[#349](https://github.com/nf-core/viralrecon/issues/349)] - ERROR in Script plasmidID +- [[#356](https://github.com/nf-core/viralrecon/issues/356)] - Add NEB SARS-CoV-2 primers +- [[#368](https://github.com/nf-core/viralrecon/issues/368)] - Incorrect depth from ivar variants reported in variants long table +- Updated pipeline template to [nf-core/tools 2.7.2](https://github.com/nf-core/tools/releases/tag/2.7.2) +- Add `tower.yml` for Report rendering in Nextflow Tower +- Use `--skip_plasmidid` by default + +### Parameters + +| Old parameter | New parameter | +| ------------- | ------------- | +| `--tracedir` | | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if new parameter information isn't present. + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| `artic` | 1.2.2 | 1.2.3 | +| `bcftools` | 1.51.1 | 1.16 | +| `blast` | 2.12.0 | 2.13.0 | +| `cutadapt` | 3.5 | 4.2 | +| `ivar` | 1.3.1 | 1.4 | +| `multiqc` | 1.13a | 1.14 | +| `nanoplot` | 1.40.0 | 1.41.0 | +| `nextclade` | 2.2.0 | 2.12.0 | +| `pangolin` | 4.1.1 | 4.2 | +| `picard` | 2.27.4 | 3.0.0 | +| `samtools` | 1.15.1 | 1.16.1 | +| `spades` | 3.15.4 | 3.15.5 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + ## [[2.5](https://github.com/nf-core/viralrecon/releases/tag/2.5)] - 2022-07-13 ### Enhancements & fixes diff --git a/LICENSE b/LICENSE index 62c62721..8f37c353 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Sarai Varona and Sara Monzon +Copyright (c) Patel H, Varona S and Monzon S Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 2b880174..c50924a5 100644 --- a/README.md +++ b/README.md @@ -5,15 +5,13 @@ [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/viralrecon/results) [![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3901628-1073c8)](https://doi.org/10.5281/zenodo.3901628) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) -[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/) -[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) [![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/viralrecon) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23viralrecon-4A154B?logo=slack)](https://nfcore.slack.com/channels/viralrecon) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23viralrecon-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/viralrecon)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction @@ -33,6 +31,8 @@ A number of improvements were made to the pipeline recently, mainly with regard ### Illumina +![nf-core/viralrecon Illumina metro map](docs/images/nf-core-viralrecon_metro_map_illumina.png) + 1. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html)) 2. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 3. Adapter trimming ([`fastp`](https://github.com/OpenGene/fastp)) @@ -63,6 +63,8 @@ A number of improvements were made to the pipeline recently, mainly with regard ### Nanopore +![nf-core/viralrecon Nanopore metro map](docs/images/nf-core-viralrecon_metro_map_nanopore.png) + 1. Sequencing QC ([`pycoQC`](https://github.com/a-slide/pycoQC)) 2. Aggregate pre-demultiplexed reads from MinKNOW/Guppy ([`artic guppyplex`](https://artic.readthedocs.io/en/latest/commands/)) 3. Read QC ([`NanoPlot`](https://github.com/wdecoster/NanoPlot)) @@ -81,13 +83,13 @@ A number of improvements were made to the pipeline recently, mainly with regard ## Quick Start -1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) +1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=22.10.1`) 2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. 3. Download the pipeline and test it on a minimal dataset with a single command: - ```console + ```bash nextflow run nf-core/viralrecon -profile test,YOURPROFILE --outdir ``` @@ -100,6 +102,8 @@ A number of improvements were made to the pipeline recently, mainly with regard 4. Start running your own analysis! + > - Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration except for parameters; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). + - Typical command for Illumina shotgun analysis: ```bash @@ -190,7 +194,7 @@ Many thanks to others who have helped out and contributed along the way too, inc | [Kevin Menden](https://github.com/KevinMenden) | [QBiC, University of Tübingen, Germany](https://portal.qbic.uni-tuebingen.de/portal/) | | [Lluc Cabus](https://github.com/lcabus-flomics) | [Flomics Biotech, Spain](https://www.flomics.com/) | | [Marta Pozuelo](https://github.com/mpozuelo-flomics) | [Flomics Biotech, Spain](https://www.flomics.com/) | -| [Maxime Garcia](https://github.com/MaxUlysse) | [SciLifeLab, Sweden](https://www.scilifelab.se/) | +| [Maxime Garcia](https://github.com/maxulysse) | [Seqera Labs, Spain](https://seqera.io/) | | [Michael Heuer](https://github.com/heuermh) | [UC Berkeley, USA](https://https://rise.cs.berkeley.edu) | | [Phil Ewels](https://github.com/ewels) | [SciLifeLab, Sweden](https://www.scilifelab.se/) | | [Richard Mitter](https://github.com/rjmitter) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | diff --git a/assets/adaptivecard.json b/assets/adaptivecard.json new file mode 100644 index 00000000..f6924843 --- /dev/null +++ b/assets/adaptivecard.json @@ -0,0 +1,67 @@ +{ + "type": "message", + "attachments": [ + { + "contentType": "application/vnd.microsoft.card.adaptive", + "contentUrl": null, + "content": { + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "msteams": { + "width": "Full" + }, + "type": "AdaptiveCard", + "version": "1.2", + "body": [ + { + "type": "TextBlock", + "size": "Large", + "weight": "Bolder", + "color": "<% if (success) { %>Good<% } else { %>Attention<%} %>", + "text": "nf-core/viralrecon v${version} - ${runName}", + "wrap": true + }, + { + "type": "TextBlock", + "spacing": "None", + "text": "Completed at ${dateComplete} (duration: ${duration})", + "isSubtle": true, + "wrap": true + }, + { + "type": "TextBlock", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors. The full error message was: ${errorReport}.<% } %>", + "wrap": true + }, + { + "type": "TextBlock", + "text": "The command used to launch the workflow was as follows:", + "wrap": true + }, + { + "type": "TextBlock", + "text": "${commandLine}", + "isSubtle": true, + "wrap": true + } + ], + "actions": [ + { + "type": "Action.ShowCard", + "title": "Pipeline Configuration", + "card": { + "type": "AdaptiveCard", + "\$schema": "http://adaptivecards.io/schemas/adaptive-card.json", + "body": [ + { + "type": "FactSet", + "facts": [<% out << summary.collect{ k,v -> "{\"title\": \"$k\", \"value\" : \"$v\"}"}.join(",\n") %> + ] + } + ] + } + } + ] + } + } + ] +} diff --git a/assets/email_template.html b/assets/email_template.html index 1bd952cd..fbf28653 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -50,8 +50,7 @@

nf-core/viralrecon execution completed read threshold (< ${min_mapped_reads}):

    -
  • ${fail_mapped_reads.sort().join('
  • -
  • ')}
  • +
  • ${fail_mapped_reads.sort().join('
  • ')}

@@ -99,32 +98,7 @@

Pipeline Configuration:

" > - <% out << summary.collect{ k,v -> " - - - $k - - -
$v
- - - " }.join("\n") %> + <% out << summary.collect{ k,v -> "$k
$v
" }.join("\n") %> diff --git a/assets/email_template.txt b/assets/email_template.txt index 9b78bb18..9cf77902 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -6,7 +6,6 @@ `._,._,' nf-core/viralrecon v${version} ---------------------------------------------------- - Run Name: $runName <% if (!success){ diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml new file mode 100644 index 00000000..056f5bfb --- /dev/null +++ b/assets/methods_description_template.yml @@ -0,0 +1,25 @@ +id: "nf-core-viralrecon-methods-description" +description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication." +section_name: "nf-core/viralrecon Methods Description" +section_href: "https://github.com/nf-core/viralrecon" +plot_type: "html" +## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline +## You inject any metadata in the Nextflow '${workflow}' object +data: | +

Methods

+

Data was processed using nf-core/viralrecon v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

+
${workflow.commandLine}
+

References

+ +
+
Notes:
+
    + ${nodoi_text} +
  • The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
  • +
  • You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
  • +
+
diff --git a/assets/multiqc_config_illumina.yml b/assets/multiqc_config_illumina.yml index 1979a953..2f7b84c2 100644 --- a/assets/multiqc_config_illumina.yml +++ b/assets/multiqc_config_illumina.yml @@ -101,6 +101,10 @@ module_order: - "./assembly_minia/*.tsv" report_section_order: + fail_mapped_reads: + after: summary_variants_metrics + fail_mapped_samples: + after: summary_variants_metrics summary_assembly_metrics: before: summary_variants_metrics amplicon_heatmap: diff --git a/assets/multiqc_config_nanopore.yml b/assets/multiqc_config_nanopore.yml index 5468d7fd..f4851ee2 100644 --- a/assets/multiqc_config_nanopore.yml +++ b/assets/multiqc_config_nanopore.yml @@ -37,6 +37,14 @@ module_order: - "./quast/*.tsv" report_section_order: + fail_barcodes_no_sample: + after: summary_variants_metrics + fail_no_barcode_samples: + after: summary_variants_metrics + fail_barcode_count_samples: + after: summary_variants_metrics + fail_guppyplex_count_samples: + after: summary_variants_metrics amplicon_heatmap: before: summary_variants_metrics software_versions: diff --git a/assets/slackreport.json b/assets/slackreport.json new file mode 100644 index 00000000..043d02f2 --- /dev/null +++ b/assets/slackreport.json @@ -0,0 +1,34 @@ +{ + "attachments": [ + { + "fallback": "Plain-text summary of the attachment.", + "color": "<% if (success) { %>good<% } else { %>danger<%} %>", + "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", + "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", + "fields": [ + { + "title": "Command used to launch the workflow", + "value": "```${commandLine}```", + "short": false + } + <% + if (!success) { %> + , + { + "title": "Full error message", + "value": "```${errorReport}```", + "short": false + }, + { + "title": "Pipeline configuration", + "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>", + "short": false + } + <% } + %> + ], + "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})" + } + ] +} diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 261ddb95..f866fd86 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -58,17 +58,12 @@ def check_illumina_samplesheet(file_in, file_out): sample_mapping_dict = {} with open(file_in, "r") as fin: - ## Check header MIN_COLS = 2 HEADER = ["sample", "fastq_1", "fastq_2"] header = [x.strip('"') for x in fin.readline().strip().split(",")] if header[: len(HEADER)] != HEADER: - print( - "ERROR: Please check samplesheet header -> {} != {}".format( - ",".join(header), ",".join(HEADER) - ) - ) + print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) sys.exit(1) ## Check sample entries @@ -85,9 +80,7 @@ def check_illumina_samplesheet(file_in, file_out): num_cols = len([x for x in lspl if x]) if num_cols < MIN_COLS: print_error( - "Invalid number of populated columns (minimum = {})!".format( - MIN_COLS - ), + "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), "Line", line, ) @@ -95,9 +88,7 @@ def check_illumina_samplesheet(file_in, file_out): ## Check sample name entries sample, fastq_1, fastq_2 = lspl[: len(HEADER)] if sample.find(" ") != -1: - print( - f"WARNING: Spaces have been replaced by underscores for sample: {sample}" - ) + print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}") sample = sample.replace(" ", "_") if not sample: print_error("Sample entry has not been specified!", "Line", line) @@ -139,21 +130,15 @@ def check_illumina_samplesheet(file_in, file_out): with open(file_out, "w") as fout: fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2"]) + "\n") for sample in sorted(sample_mapping_dict.keys()): - ## Check that multiple runs of the same sample are of the same datatype - if not all( - x[0] == sample_mapping_dict[sample][0][0] - for x in sample_mapping_dict[sample] - ): + if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): print_error( "Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample), ) for idx, val in enumerate(sample_mapping_dict[sample]): - fout.write( - ",".join(["{}_T{}".format(sample, idx + 1)] + val) + "\n" - ) + fout.write(",".join(["{}_T{}".format(sample, idx + 1)] + val) + "\n") else: print_error("No entries to process!", "Samplesheet: {}".format(file_in)) @@ -173,17 +158,12 @@ def check_nanopore_samplesheet(file_in, file_out): sample_mapping_dict = {} with open(file_in, "r") as fin: - ## Check header MIN_COLS = 2 HEADER = ["sample", "barcode"] header = [x.strip('"') for x in fin.readline().strip().split(",")] if header[: len(HEADER)] != HEADER: - print( - "ERROR: Please check samplesheet header -> {} != {}".format( - ",".join(header), ",".join(HEADER) - ) - ) + print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) sys.exit(1) ## Check sample entries @@ -200,9 +180,7 @@ def check_nanopore_samplesheet(file_in, file_out): num_cols = len([x for x in lspl if x]) if num_cols < MIN_COLS: print_error( - "Invalid number of populated columns (minimum = {})!".format( - MIN_COLS - ), + "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), "Line", line, ) @@ -210,14 +188,10 @@ def check_nanopore_samplesheet(file_in, file_out): ## Check sample entry sample, barcode = lspl[: len(HEADER)] if sample.find(" ") != -1: - print( - f"WARNING: Spaces have been replaced by underscores for sample: {sample}" - ) + print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}") sample = sample.replace(" ", "_") if sample.find("-") != -1: - print( - f"WARNING: Dashes have been replaced by underscores for sample: {sample}" - ) + print(f"WARNING: Dashes have been replaced by underscores for sample: {sample}") sample = sample.replace("-", "_") if not sample: print_error("Sample entry has not been specified!", "Line", line) @@ -274,4 +248,4 @@ def main(args=None): if __name__ == "__main__": - sys.exit(main()) \ No newline at end of file + sys.exit(main()) diff --git a/bin/collapse_primer_bed.py b/bin/collapse_primer_bed.py index 7a439f36..d04d7744 100755 --- a/bin/collapse_primer_bed.py +++ b/bin/collapse_primer_bed.py @@ -56,9 +56,7 @@ def collapse_primer_bed(file_in, file_out, left_primer_suffix, right_primer_suff line = fin.readline() if line: chrom, start, end, name, score, strand = line.strip().split("\t") - primer = re.sub( - r"(?:{}|{}).*".format(left_primer_suffix, right_primer_suffix), "", name - ) + primer = re.sub(r"(?:{}|{}).*".format(left_primer_suffix, right_primer_suffix), "", name) if primer not in interval_dict: interval_dict[primer] = [] interval_dict[primer].append((chrom, int(start), int(end), score)) @@ -81,9 +79,7 @@ def collapse_primer_bed(file_in, file_out, left_primer_suffix, right_primer_suff def main(args=None): args = parse_args(args) - collapse_primer_bed( - args.FILE_IN, args.FILE_OUT, args.LEFT_PRIMER_SUFFIX, args.RIGHT_PRIMER_SUFFIX - ) + collapse_primer_bed(args.FILE_IN, args.FILE_OUT, args.LEFT_PRIMER_SUFFIX, args.RIGHT_PRIMER_SUFFIX) if __name__ == "__main__": diff --git a/bin/fastq_dir_to_samplesheet.py b/bin/fastq_dir_to_samplesheet.py index f56d5a00..b2e08eed 100755 --- a/bin/fastq_dir_to_samplesheet.py +++ b/bin/fastq_dir_to_samplesheet.py @@ -7,9 +7,7 @@ def parse_args(args=None): - Description = ( - "Generate nf-core/viralrecon samplesheet from a directory of FastQ files." - ) + Description = "Generate nf-core/viralrecon samplesheet from a directory of FastQ files." Epilog = "Example usage: python fastq_dir_to_samplesheet.py " parser = argparse.ArgumentParser(description=Description, epilog=Epilog) @@ -79,9 +77,7 @@ def sanitize_sample(path, extension): sample = os.path.basename(path).replace(extension, "") if sanitise_name: sample = sanitise_name_delimiter.join( - os.path.basename(path).split(sanitise_name_delimiter)[ - :sanitise_name_index - ] + os.path.basename(path).split(sanitise_name_delimiter)[:sanitise_name_index] ) return sample @@ -92,9 +88,7 @@ def get_fastqs(extension): sorted results. See also https://stackoverflow.com/questions/6773584/how-is-pythons-glob-glob-ordered """ - return sorted( - glob.glob(os.path.join(fastq_dir, f"*{extension}"), recursive=False) - ) + return sorted(glob.glob(os.path.join(fastq_dir, f"*{extension}"), recursive=False)) read_dict = {} @@ -128,9 +122,7 @@ def get_fastqs(extension): sample_info = ",".join([sample, read_1, read_2]) fout.write(f"{sample_info}\n") else: - error_str = ( - "\nWARNING: No FastQ files found so samplesheet has not been created!\n\n" - ) + error_str = "\nWARNING: No FastQ files found so samplesheet has not been created!\n\n" error_str += "Please check the values provided for the:\n" error_str += " - Path to the directory containing the FastQ files\n" error_str += " - '--read1_extension' parameter\n" diff --git a/bin/fetch_sra_runinfo.py b/bin/fetch_sra_runinfo.py deleted file mode 100755 index 378c1745..00000000 --- a/bin/fetch_sra_runinfo.py +++ /dev/null @@ -1,259 +0,0 @@ -#!/usr/bin/env python - -import os -import re -import sys -import csv -import errno -import requests -import argparse - - -## Example ids supported by this script -SRA_IDS = [ - "PRJNA63463", - "SAMN00765663", - "SRA023522", - "SRP003255", - "SRR390278", - "SRS282569", - "SRX111814", -] -ENA_IDS = [ - "ERA2421642", - "ERP120836", - "ERR674736", - "ERS4399631", - "ERX629702", - "PRJEB7743", - "SAMEA3121481", -] -GEO_IDS = ["GSE18729", "GSM465244"] -ID_REGEX = r"^[A-Z]+" -PREFIX_LIST = sorted( - list(set([re.search(ID_REGEX, x).group() for x in SRA_IDS + ENA_IDS + GEO_IDS])) -) - - -def parse_args(args=None): - Description = "Download and create a run information metadata file from SRA/ENA/GEO identifiers." - Epilog = """Example usage: python fetch_sra_runinfo.py """ - - parser = argparse.ArgumentParser(description=Description, epilog=Epilog) - parser.add_argument( - "FILE_IN", help="File containing database identifiers, one per line." - ) - parser.add_argument("FILE_OUT", help="Output file in tab-delimited format.") - parser.add_argument( - "-pl", - "--platform", - type=str, - dest="PLATFORM", - default="", - help="Comma-separated list of platforms to use for filtering. Accepted values = 'ILLUMINA', 'OXFORD_NANOPORE' (default: '').", - ) - parser.add_argument( - "-ll", - "--library_layout", - type=str, - dest="LIBRARY_LAYOUT", - default="", - help="Comma-separated list of library layouts to use for filtering. Accepted values = 'SINGLE', 'PAIRED' (default: '').", - ) - return parser.parse_args(args) - - -def validate_csv_param(param, validVals, param_desc): - validList = [] - if param: - userVals = param.split(",") - intersect = list(set(userVals) & set(validVals)) - if len(intersect) == len(userVals): - validList = intersect - else: - print( - "ERROR: Please provide a valid {} parameter!\nProvided values = {}\nAccepted values = {}".format( - param_desc, param, ",".join(validVals) - ) - ) - sys.exit(1) - return validList - - -def make_dir(path): - if not len(path) == 0: - try: - os.makedirs(path) - except OSError as exception: - if exception.errno != errno.EEXIST: - raise - - -def fetch_url(url, encoding="utf-8"): - try: - r = requests.get(url) - except requests.exceptions.RequestException as e: - raise SystemExit(e) - if r.status_code != 200: - print("ERROR: Connection failed\nError code '{}'".format(r.status_code)) - sys.exit(1) - return r.content.decode(encoding).splitlines() - - -def id_to_srx(db_id): - ids = [] - url = "https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?save=efetch&db=sra&rettype=runinfo&term={}".format( - db_id - ) - for row in csv.DictReader(fetch_url(url), delimiter=","): - ids.append(row["Experiment"]) - return ids - - -def id_to_erx(db_id): - ids = [] - fields = ["run_accession", "experiment_accession"] - url = "http://www.ebi.ac.uk/ena/data/warehouse/filereport?accession={}&result=read_run&fields={}".format( - db_id, ",".join(fields) - ) - for row in csv.DictReader(fetch_url(url), delimiter="\t"): - ids.append(row["experiment_accession"]) - return ids - - -def gse_to_srx(db_id): - ids = [] - url = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={}&targ=gsm&view=data&form=text".format( - db_id - ) - gsm_ids = [x.split("=")[1].strip() for x in fetch_url(url) if x.find("GSM") != -1] - for gsm_id in gsm_ids: - ids += id_to_srx(gsm_id) - return ids - - -def get_ena_fields(): - fields = [] - url = "https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run" - for row in csv.DictReader(fetch_url(url), delimiter="\t"): - fields.append(row["columnId"]) - return fields - - -def fetch_sra_runinfo(FileIn, FileOut, platformList=[], libraryLayoutList=[]): - total_out = 0 - seen_ids = [] - run_ids = [] - header = [] - make_dir(os.path.dirname(FileOut)) - ena_fields = get_ena_fields() - fin = open(FileIn, "r") - fout = open(FileOut, "w") - while True: - line = fin.readline() - if line: - db_id = line.strip() - match = re.search(ID_REGEX, db_id) - if match: - prefix = match.group() - if prefix in PREFIX_LIST: - if not db_id in seen_ids: - - ids = [db_id] - ## Resolve/expand these ids against GEO URL - if prefix in ["GSE"]: - ids = gse_to_srx(db_id) - - ## Resolve/expand these ids against SRA URL - elif prefix in ["GSM", "PRJNA", "SAMN", "SRR"]: - ids = id_to_srx(db_id) - - ## Resolve/expand these ids against ENA URL - elif prefix in ["ERR"]: - ids = id_to_erx(db_id) - - ## Resolve/expand to get run identifier from ENA and write to file - for id in ids: - url = "http://www.ebi.ac.uk/ena/data/warehouse/filereport?accession={}&result=read_run&fields={}".format( - id, ",".join(ena_fields) - ) - csv_dict = csv.DictReader(fetch_url(url), delimiter="\t") - for row in csv_dict: - run_id = row["run_accession"] - if not run_id in run_ids: - - writeID = True - if platformList: - if ( - row["instrument_platform"] - not in platformList - ): - writeID = False - if libraryLayoutList: - if ( - row["library_layout"] - not in libraryLayoutList - ): - writeID = False - - if writeID: - if total_out == 0: - header = sorted(row.keys()) - fout.write( - "{}\n".format("\t".join(sorted(header))) - ) - else: - if header != sorted(row.keys()): - print( - "ERROR: Metadata columns do not match for id {}!\nLine: '{}'".format( - run_id, line.strip() - ) - ) - sys.exit(1) - fout.write( - "{}\n".format( - "\t".join([row[x] for x in header]) - ) - ) - total_out += 1 - run_ids.append(run_id) - seen_ids.append(db_id) - else: - id_str = ", ".join([x + "*" for x in PREFIX_LIST]) - print( - "ERROR: Please provide a valid database id starting with {}!\nLine: '{}'".format( - id_str, line.strip() - ) - ) - sys.exit(1) - else: - id_str = ", ".join([x + "*" for x in PREFIX_LIST]) - print( - "ERROR: Please provide a valid database id starting with {}!\nLine: '{}'".format( - id_str, line.strip() - ) - ) - sys.exit(1) - else: - break - fin.close() - fout.close() - - -def main(args=None): - args = parse_args(args) - platformList = validate_csv_param( - args.PLATFORM, - validVals=["ILLUMINA", "OXFORD_NANOPORE"], - param_desc="--platform", - ) - libraryLayoutList = validate_csv_param( - args.LIBRARY_LAYOUT, - validVals=["SINGLE", "PAIRED"], - param_desc="--library_layout", - ) - fetch_sra_runinfo(args.FILE_IN, args.FILE_OUT, platformList, libraryLayoutList) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index a900f502..d00b3988 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +from email.charset import QP import os import sys import re @@ -82,8 +83,8 @@ def parse_ivar_line(line): return: CHROM, POS, ID, REF, ALT, QUAL, INFO, FORMAT, REF_CODON, ALT_CODON, pass_test, var_type """ - line = re.split("\t", line) + line = line.strip("\n").split("\t") ## Assign intial fields to variables CHROM = line[0] POS = line[1] @@ -92,7 +93,12 @@ def parse_ivar_line(line): ALT = line[3] ## REF/ALF depths and quals - REF_DP = int(line[4]) + try: + REF_DP = int(line[4]) + except ValueError: + print(line) + print(line[4]) + exit(-1) REF_RV = int(line[5]) REF_FW = REF_DP - REF_RV REF_QUAL = int(line[6]) @@ -120,7 +126,7 @@ def parse_ivar_line(line): QUAL = "." ## Determine FILTER field - INFO = f"DP={line[11]}" + INFO = f"DP={int(float(line[11]))}" pass_test = line[13] return ( @@ -172,9 +178,7 @@ def strand_bias_filter(format): # table: ## REF_FW REF_RV ## ALT_FW ALT_RV - table = np.array( - [[format[0] - format[1], format[1]], [format[3] - format[4], format[4]]] - ) + table = np.array([[format[0] - format[1], format[1]], [format[3] - format[4], format[4]]]) oddsr, pvalue = fisher_exact(table, alternative="greater") # h0: both strands are equally represented. @@ -202,9 +206,7 @@ def write_vcf_header(ref, ignore_strand_bias, file_out, filename): if ref: header_contig = [] for record in SeqIO.parse(ref, "fasta"): - header_contig += [ - "##contig=" - ] + header_contig += ["##contig="] header_source += header_contig @@ -225,9 +227,7 @@ def write_vcf_header(ref, ignore_strand_bias, file_out, filename): ] header_cols = [f"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{filename}"] if not ignore_strand_bias: - header_filter += [ - '##FILTER=' - ] + header_filter += ['##FILTER='] header = header_source + header_info + header_filter + header_format + header_cols fout = open(file_out, "w") @@ -289,7 +289,8 @@ def check_consecutive(mylist): return: Number of items consecutive in the list - [False, 2, 3,..] """ - my_list = list(map(int, mylist)) + # getting first index of tuple for consecutive checking + my_list = list(map(int, [i[0] for i in mylist])) ## Check if the list contains consecutive numbers if len(my_list) == 1: return False @@ -316,8 +317,9 @@ def get_diff_position(seq1, seq2): Returns: Returns position where seq1 != seq2 """ + # If codon is NA treat as not same codon if seq1 == "NA": - return False + return 2 ind_diff = [i for i in range(len(seq1)) if seq1[i] != seq2[i]] if len(ind_diff) > 1: @@ -380,9 +382,7 @@ def process_variants(variants, num_collapse): """ # Collapsed variant parameters equal to first variant key_list = ["chrom", "pos", "id", "qual", "filter", "info", "format"] - chrom, pos, id, qual, filter, info, format = [ - variants[next(iter(variants))][key] for key in key_list - ] + chrom, pos, id, qual, filter, info, format = [variants[next(iter(variants))][key] for key in key_list] # If no consecutive, process one variant line # If two consecutive, process two variant lines into one @@ -390,7 +390,7 @@ def process_variants(variants, num_collapse): ref = "" alt = "" iter_variants = iter(variants) - for i in range(num_collapse): + for _ in range(num_collapse): # fixed notation var = next(iter_variants) ref += variants[var]["ref"] alt += variants[var]["alt"] @@ -409,6 +409,7 @@ def main(args=None): var_count_dict = {"SNP": 0, "INS": 0, "DEL": 0} # variant counts variants = OrderedDict() # variant dict (merge codon) q_pos = deque([], maxlen=3) # pos fifo queue (merge codon) + last_pos = "" # Create output directory make_dir(out_dir) @@ -423,8 +424,7 @@ def main(args=None): ################################# with open(args.file_in, "r") as fin: for line in fin: - if not re.match("REGION", line): - + if "REGION" not in line: ################ ## Parse line ## ################ @@ -445,6 +445,12 @@ def main(args=None): pass_test, var_type, ) = parse_ivar_line(line) + + ## If pos is duplicated due to annotation skip lines + if pos == last_pos: + continue + + last_pos = pos ##################### ## Process filters ## ##################### @@ -469,10 +475,7 @@ def main(args=None): if args.pass_only and filter != "PASS": write_line = False ### AF filtering. ALT_DP/(ALT_DP+REF_DP) - if ( - float(format[3] / (format[0] + format[3])) - < args.allele_freq_threshold - ): + if float(format[3] / (format[0] + format[3])) < args.allele_freq_threshold: write_line = False ### Duplication filter if (chrom, pos, ref, alt) in var_list: @@ -486,7 +489,7 @@ def main(args=None): ############################################################ if not args.ignore_merge_codons and var_type == "SNP": ## re-fill queue and dict accordingly - q_pos.append(pos) + q_pos.append((pos, var_type)) # adding type information variants[(chrom, pos, ref, alt)] = { "chrom": chrom, "pos": pos, @@ -504,9 +507,7 @@ def main(args=None): if len(q_pos) == q_pos.maxlen: fe_codon_ref = variants[next(iter(variants))]["ref_codon"] fe_codon_alt = variants[next(iter(variants))]["alt_codon"] - num_collapse = check_merge_codons( - q_pos, fe_codon_ref, fe_codon_alt - ) + num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) ( chrom, pos, @@ -520,7 +521,7 @@ def main(args=None): ) = process_variants(variants, num_collapse) ## Empty variants dict and queue accordingly - for i in range(num_collapse): + for _ in range(num_collapse): variants.popitem(last=False) q_pos.popleft() else: @@ -549,28 +550,44 @@ def main(args=None): ## handle last lines ## ####################### while len(q_pos) > 0: - fe_codon_ref = variants[next(iter(variants))]["ref_codon"] - fe_codon_alt = variants[next(iter(variants))]["alt_codon"] - num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) - (chrom, pos, id, ref, alt, qual, filter, info, format) = process_variants( - variants, num_collapse - ) - - var_count_dict[var_type] += 1 - write_vcf_line( - chrom, pos, id, ref, alt, filter, qual, info, format, args.file_out - ) - ## Empty variants dict and queue accordingly - for i in range(num_collapse): - variants.popitem(last=False) - q_pos.popleft() + try: + fe_codon_ref = variants[next(iter(variants))]["ref_codon"] + fe_codon_alt = variants[next(iter(variants))]["alt_codon"] + except StopIteration: + break + else: + num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) + (chrom, pos, id, ref, alt, qual, filter, info, format) = process_variants(variants, num_collapse) + + var_count_dict[q_pos[0][1]] += 1 + write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, args.file_out) + ## Empty variants dict and queue accordingly + for _ in range(num_collapse): + variants.popitem(last=False) + q_pos.popleft() ############################################# ## variant counts to pass to MultiQC ## ############################################# var_count_list = [(k, str(v)) for k, v in sorted(var_count_dict.items())] - print("\t".join(["sample"] + [x[0] for x in var_count_list])) - print("\t".join([filename] + [x[1] for x in var_count_list])) + + # format output table a little more cleanly + # row_spacing = len(filename) + + row = create_f_string(30, "<") # an arbitraily long value to fit most sample names + row += create_f_string(10) * len(var_count_list) # A spacing of ten looks pretty + + headers = ["sample"] + headers.extend([x[0] for x in var_count_list]) + data = [filename] + data.extend([x[1] for x in var_count_list]) + print(row.format(*headers)) + print(row.format(*data)) + + +def create_f_string(str_size, placement="^"): + row_size = "{: " + placement + str(str_size) + "}" + return row_size if __name__ == "__main__": diff --git a/bin/make_bed_mask.py b/bin/make_bed_mask.py index 46e06bed..29b07a27 100755 --- a/bin/make_bed_mask.py +++ b/bin/make_bed_mask.py @@ -49,10 +49,10 @@ def make_bed_mask(bed_in, bed_out, indels_pos_len): for position in indels_positions: indel_init_pos = position indel_whole_length = indels_pos_len[position] - indel_end_pos = int(indel_init_pos) + int(indel_whole_length)-1 - if int(init_pos) in range( + indel_end_pos = int(indel_init_pos) + int(indel_whole_length) - 1 + if int(init_pos) in range(int(indel_init_pos), int(indel_end_pos)) or int(end_pos) in range( int(indel_init_pos), int(indel_end_pos) - ) or int(end_pos) in range(int(indel_init_pos), int(indel_end_pos)): + ): test = False break else: diff --git a/bin/make_variants_long_table.py b/bin/make_variants_long_table.py index a19c495f..f0bad221 100755 --- a/bin/make_variants_long_table.py +++ b/bin/make_variants_long_table.py @@ -14,22 +14,66 @@ logger = logging.getLogger() -pd.set_option('display.max_columns', None) -pd.set_option('display.max_rows', None) +pd.set_option("display.max_columns", None) +pd.set_option("display.max_rows", None) def parser_args(args=None): - Description = 'Create long/wide tables containing variant information.' + Description = "Create long/wide tables containing variant information." Epilog = """Example usage: python make_variants_long_table.py --bcftools_query_dir ./bcftools_query/ --snpsift_dir ./snpsift/ --pangolin_dir ./pangolin/""" parser = argparse.ArgumentParser(description=Description, epilog=Epilog) - parser.add_argument("-bd", "--bcftools_query_dir" , type=str, default="./bcftools_query" , help="Directory containing output of BCFTools query for each sample (default: './bcftools_query').") - parser.add_argument("-sd", "--snpsift_dir" , type=str, default="./snpsift" , help="Directory containing output of SnpSift for each sample (default: './snpsift').") - parser.add_argument("-pd", "--pangolin_dir" , type=str, default="./pangolin" , help="Directory containing output of Pangolin for each sample (default: './pangolin').") - parser.add_argument("-bs", "--bcftools_file_suffix", type=str, default=".bcftools_query.txt" , help="Suffix to trim off BCFTools query file name to obtain sample name (default: '.bcftools_query.txt').") - parser.add_argument("-ss", "--snpsift_file_suffix" , type=str, default=".snpsift.txt" , help="Suffix to trim off SnpSift file name to obtain sample name (default: '.snpsift.txt').") - parser.add_argument("-ps", "--pangolin_file_suffix", type=str, default=".pangolin.csv" , help="Suffix to trim off Pangolin file name to obtain sample name (default: '.pangolin.csv').") - parser.add_argument("-of", "--output_file" , type=str, default="variants_long_table.csv", help="Full path to output file (default: 'variants_long_table.csv').") - parser.add_argument("-vc", "--variant_caller" , type=str, default="ivar" , help="Tool used to call the variants (default: 'ivar').") + parser.add_argument( + "-bd", + "--bcftools_query_dir", + type=str, + default="./bcftools_query", + help="Directory containing output of BCFTools query for each sample (default: './bcftools_query').", + ) + parser.add_argument( + "-sd", + "--snpsift_dir", + type=str, + default="./snpsift", + help="Directory containing output of SnpSift for each sample (default: './snpsift').", + ) + parser.add_argument( + "-pd", + "--pangolin_dir", + type=str, + default="./pangolin", + help="Directory containing output of Pangolin for each sample (default: './pangolin').", + ) + parser.add_argument( + "-bs", + "--bcftools_file_suffix", + type=str, + default=".bcftools_query.txt", + help="Suffix to trim off BCFTools query file name to obtain sample name (default: '.bcftools_query.txt').", + ) + parser.add_argument( + "-ss", + "--snpsift_file_suffix", + type=str, + default=".snpsift.txt", + help="Suffix to trim off SnpSift file name to obtain sample name (default: '.snpsift.txt').", + ) + parser.add_argument( + "-ps", + "--pangolin_file_suffix", + type=str, + default=".pangolin.csv", + help="Suffix to trim off Pangolin file name to obtain sample name (default: '.pangolin.csv').", + ) + parser.add_argument( + "-of", + "--output_file", + type=str, + default="variants_long_table.csv", + help="Full path to output file (default: 'variants_long_table.csv').", + ) + parser.add_argument( + "-vc", "--variant_caller", type=str, default="ivar", help="Tool used to call the variants (default: 'ivar')." + ) return parser.parse_args(args) @@ -43,113 +87,138 @@ def make_dir(path): def get_file_dict(file_dir, file_suffix): - files = glob.glob(os.path.join(file_dir, f'*{file_suffix}')) - samples = [os.path.basename(x).removesuffix(f'{file_suffix}') for x in files] + files = glob.glob(os.path.join(file_dir, f"*{file_suffix}")) + samples = [os.path.basename(x).removesuffix(f"{file_suffix}") for x in files] return dict(zip(samples, files)) def three_letter_aa_to_one(hgvs_three): - aa_dict= { - 'Ala': 'A', 'Arg': 'R', 'Asn': 'N', 'Asp': 'D', 'Cys': 'C', - 'Gln': 'Q', 'Glu': 'E', 'Gly': 'G', 'His': 'H', 'Ile': 'I', - 'Leu': 'L', 'Lys': 'K', 'Met': 'M', 'Phe': 'F', 'Pro': 'P', - 'Pyl': 'O', 'Ser': 'S', 'Sec': 'U', 'Thr': 'T', 'Trp': 'W', - 'Tyr': 'Y', 'Val': 'V', 'Asx': 'B', 'Glx': 'Z', 'Xaa': 'X', - 'Xle': 'J', 'Ter': '*' + aa_dict = { + "Ala": "A", + "Arg": "R", + "Asn": "N", + "Asp": "D", + "Cys": "C", + "Gln": "Q", + "Glu": "E", + "Gly": "G", + "His": "H", + "Ile": "I", + "Leu": "L", + "Lys": "K", + "Met": "M", + "Phe": "F", + "Pro": "P", + "Pyl": "O", + "Ser": "S", + "Sec": "U", + "Thr": "T", + "Trp": "W", + "Tyr": "Y", + "Val": "V", + "Asx": "B", + "Glx": "Z", + "Xaa": "X", + "Xle": "J", + "Ter": "*", } hgvs_one = hgvs_three for key in aa_dict: if key in hgvs_one: - hgvs_one = hgvs_one.replace(str(key),str(aa_dict[key])) + hgvs_one = hgvs_one.replace(str(key), str(aa_dict[key])) return hgvs_one ## Returns a pandas dataframe in the format: - # CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF - # 0 MN908947.3 241 C T PASS 642 375 266 0.41 - # 1 MN908947.3 1875 C T PASS 99 63 34 0.34 +# CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF +# 0 MN908947.3 241 C T PASS 642 375 266 0.41 +# 1 MN908947.3 1875 C T PASS 99 63 34 0.34 def ivar_bcftools_query_to_table(bcftools_query_file): - table = pd.read_table(bcftools_query_file, header='infer') - table = table.dropna(how='all', axis=1) + table = pd.read_table(bcftools_query_file, header="infer") + table = table.dropna(how="all", axis=1) old_colnames = list(table.columns) - new_colnames = [x.split(']')[-1].split(':')[-1] for x in old_colnames] + new_colnames = [x.split("]")[-1].split(":")[-1] for x in old_colnames] table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True) if not table.empty: table[["ALT_DP", "DP"]] = table[["ALT_DP", "DP"]].apply(pd.to_numeric) - table['AF'] = table['ALT_DP'] / table['DP'] - table['AF'] = table['AF'].round(2) + table["AF"] = table["ALT_DP"] / table["DP"] + table["AF"] = table["AF"].round(2) return table ## Returns a pandas dataframe in the format: - # CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF - # 0 MN908947.3 241 C T . 24 8 16 0.67 - # 1 MN908947.3 3037 C T . 17 5 12 0.71 +# CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF +# 0 MN908947.3 241 C T . 24 8 16 0.67 +# 1 MN908947.3 3037 C T . 17 5 12 0.71 def bcftools_bcftools_query_to_table(bcftools_query_file): - table = pd.read_table(bcftools_query_file, header='infer') - table = table.dropna(how='all', axis=1) + table = pd.read_table(bcftools_query_file, header="infer") + table = table.dropna(how="all", axis=1) old_colnames = list(table.columns) - new_colnames = [x.split(']')[-1].split(':')[-1] for x in old_colnames] + new_colnames = [x.split("]")[-1].split(":")[-1] for x in old_colnames] table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True) if not table.empty: - table[['REF_DP','ALT_DP']] = table['AD'].str.split(',', expand=True) + table[["REF_DP", "ALT_DP"]] = table["AD"].str.split(",", expand=True) table[["ALT_DP", "DP"]] = table[["ALT_DP", "DP"]].apply(pd.to_numeric) - table['AF'] = table['ALT_DP'] / table['DP'] - table['AF'] = table['AF'].round(2) - table.drop('AD', axis=1, inplace=True) + table["AF"] = table["ALT_DP"] / table["DP"] + table["AF"] = table["AF"].round(2) + table.drop("AD", axis=1, inplace=True) return table ## Returns a pandas dataframe in the format: - # CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF - # 0 MN908947.3 241 C T PASS 30 1 29 0.97 - # 1 MN908947.3 1163 A T PASS 28 0 28 1.00 +# CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF +# 0 MN908947.3 241 C T PASS 30 1 29 0.97 +# 1 MN908947.3 1163 A T PASS 28 0 28 1.00 def nanopolish_bcftools_query_to_table(bcftools_query_file): - table = pd.read_table(bcftools_query_file, header='infer') - table = table.dropna(how='all', axis=1) + table = pd.read_table(bcftools_query_file, header="infer") + table = table.dropna(how="all", axis=1) old_colnames = list(table.columns) - new_colnames = [x.split(']')[-1].split(':')[-1] for x in old_colnames] + new_colnames = [x.split("]")[-1].split(":")[-1] for x in old_colnames] table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True) ## Split out ref/alt depths from StrandSupport column if not table.empty: table_cp = table.copy() - table_cp[['FORW_REF_DP','REV_REF_DP', 'FORW_ALT_DP','REV_ALT_DP']] = table_cp['StrandSupport'].str.split(',', expand=True) - table_cp[['FORW_REF_DP','REV_REF_DP', 'FORW_ALT_DP','REV_ALT_DP']] = table_cp[['FORW_REF_DP','REV_REF_DP', 'FORW_ALT_DP','REV_ALT_DP']].apply(pd.to_numeric) - - table['DP'] = table_cp[['FORW_REF_DP','REV_REF_DP', 'FORW_ALT_DP','REV_ALT_DP']].sum(axis=1) - table['REF_DP'] = table_cp[['FORW_REF_DP','REV_REF_DP']].sum(axis=1) - table['ALT_DP'] = table_cp[['FORW_ALT_DP','REV_ALT_DP']].sum(axis=1) - table['AF'] = table['ALT_DP'] / table['DP'] - table['AF'] = table['AF'].round(2) - table.drop('StrandSupport', axis=1, inplace=True) + table_cp[["FORW_REF_DP", "REV_REF_DP", "FORW_ALT_DP", "REV_ALT_DP"]] = table_cp["StrandSupport"].str.split( + ",", expand=True + ) + table_cp[["FORW_REF_DP", "REV_REF_DP", "FORW_ALT_DP", "REV_ALT_DP"]] = table_cp[ + ["FORW_REF_DP", "REV_REF_DP", "FORW_ALT_DP", "REV_ALT_DP"] + ].apply(pd.to_numeric) + + table["DP"] = table_cp[["FORW_REF_DP", "REV_REF_DP", "FORW_ALT_DP", "REV_ALT_DP"]].sum(axis=1) + table["REF_DP"] = table_cp[["FORW_REF_DP", "REV_REF_DP"]].sum(axis=1) + table["ALT_DP"] = table_cp[["FORW_ALT_DP", "REV_ALT_DP"]].sum(axis=1) + table["AF"] = table["ALT_DP"] / table["DP"] + table["AF"] = table["AF"].round(2) + table.drop("StrandSupport", axis=1, inplace=True) return table ## Returns a pandas dataframe in the format: - # CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF - # 0 MN908947.3 241 C T PASS 21 0 21 1.00 - # 1 MN908947.3 3037 C T PASS 28 0 25 0.89 +# CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF +# 0 MN908947.3 241 C T PASS 21 0 21 1.00 +# 1 MN908947.3 3037 C T PASS 28 0 25 0.89 def medaka_bcftools_query_to_table(bcftools_query_file): - table = pd.read_table(bcftools_query_file, header='infer') - table = table.dropna(how='all', axis=1) + table = pd.read_table(bcftools_query_file, header="infer") + table = table.dropna(how="all", axis=1) old_colnames = list(table.columns) - new_colnames = [x.split(']')[-1].split(':')[-1] for x in old_colnames] + new_colnames = [x.split("]")[-1].split(":")[-1] for x in old_colnames] table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True) if not table.empty: - table[['REF_DP','ALT_DP']] = table['AC'].str.split(',', expand=True) + table[["REF_DP", "ALT_DP"]] = table["AC"].str.split(",", expand=True) table[["ALT_DP", "DP"]] = table[["ALT_DP", "DP"]].apply(pd.to_numeric) - table['AF'] = table['ALT_DP'] / table['DP'] - table['AF'] = table['AF'].round(2) - table.drop('AC', axis=1, inplace=True) + table["AF"] = table["ALT_DP"] / table["DP"] + table["AF"] = table["AF"].round(2) + table.drop("AC", axis=1, inplace=True) return table @@ -157,25 +226,25 @@ def medaka_bcftools_query_to_table(bcftools_query_file): def get_pangolin_lineage(pangolin_file): table = pd.read_csv(pangolin_file, sep=",", header="infer") - return table['lineage'][0] + return table["lineage"][0] def snpsift_to_table(snpsift_file): - table = pd.read_table(snpsift_file, sep="\t", header='infer') - table = table.loc[:, ~table.columns.str.contains('^Unnamed')] + table = pd.read_table(snpsift_file, sep="\t", header="infer") + table = table.loc[:, ~table.columns.str.contains("^Unnamed")] old_colnames = list(table.columns) - new_colnames = [x.replace('ANN[*].', '') for x in old_colnames] + new_colnames = [x.replace("ANN[*].", "") for x in old_colnames] table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True) - table = table.loc[:, ['CHROM', 'POS', 'REF', 'ALT', 'GENE', 'EFFECT', 'HGVS_C', 'HGVS_P']] + table = table.loc[:, ["CHROM", "POS", "REF", "ALT", "GENE", "EFFECT", "HGVS_C", "HGVS_P"]] ## Split by comma and get first value in cols = ['ALT','GENE','EFFECT','HGVS_C','HGVS_P'] for i in range(len(table)): - for j in range(3,8): - table.iloc[i,j] = str(table.iloc[i,j]).split(",")[0] + for j in range(3, 8): + table.iloc[i, j] = str(table.iloc[i, j]).split(",")[0] ## Amino acid substitution aa = [] - for index,item in table["HGVS_P"].iteritems(): + for index, item in table["HGVS_P"].iteritems(): hgvs_p = three_letter_aa_to_one(str(item)) aa.append(hgvs_p) table["HGVS_P_1LETTER"] = pd.Series(aa) @@ -191,9 +260,11 @@ def main(args=None): make_dir(out_dir) ## Check correct variant caller has been provided - variant_callers = ['ivar', 'bcftools', 'nanopolish', 'medaka'] + variant_callers = ["ivar", "bcftools", "nanopolish", "medaka"] if args.variant_caller not in variant_callers: - logger.error(f"Invalid option '--variant caller {args.variant_caller}'. Valid options: " + ', '.join(variant_callers)) + logger.error( + f"Invalid option '--variant caller {args.variant_caller}'. Valid options: " + ", ".join(variant_callers) + ) sys.exit(1) ## Find files and create a dictionary {'sample': '/path/to/file'} @@ -203,50 +274,52 @@ def main(args=None): ## Check all files are provided for each sample if set(bcftools_files) != set(snpsift_files): - logger.error(f"Number of BCFTools ({len(bcftools_files)}) and SnpSift ({len(snpsift_files)}) files do not match!") + logger.error( + f"Number of BCFTools ({len(bcftools_files)}) and SnpSift ({len(snpsift_files)}) files do not match!" + ) sys.exit(1) else: if pangolin_files: if set(bcftools_files) != set(pangolin_files): - logger.error(f"Number of BCFTools ({len(bcftools_files)}) and Pangolin ({len(pangolin_files)}) files do not match!") + logger.error( + f"Number of BCFTools ({len(bcftools_files)}) and Pangolin ({len(pangolin_files)}) files do not match!" + ) sys.exit(1) ## Create per-sample table and write to file sample_tables = [] for sample in sorted(bcftools_files): - ## Read in BCFTools query file bcftools_table = None - if args.variant_caller == 'ivar': + if args.variant_caller == "ivar": bcftools_table = ivar_bcftools_query_to_table(bcftools_files[sample]) - elif args.variant_caller == 'bcftools': + elif args.variant_caller == "bcftools": bcftools_table = bcftools_bcftools_query_to_table(bcftools_files[sample]) - elif args.variant_caller == 'nanopolish': + elif args.variant_caller == "nanopolish": bcftools_table = nanopolish_bcftools_query_to_table(bcftools_files[sample]) - elif args.variant_caller == 'medaka': + elif args.variant_caller == "medaka": bcftools_table = medaka_bcftools_query_to_table(bcftools_files[sample]) if not bcftools_table.empty: - ## Read in SnpSift file snpsift_table = snpsift_to_table(snpsift_files[sample]) - merged_table = pd.DataFrame(data = bcftools_table) - merged_table.insert(0,'SAMPLE', sample) - merged_table = pd.merge(merged_table, snpsift_table, how='outer') - merged_table['CALLER'] = args.variant_caller + merged_table = pd.DataFrame(data=bcftools_table) + merged_table.insert(0, "SAMPLE", sample) + merged_table = pd.merge(merged_table, snpsift_table, how="outer") + merged_table["CALLER"] = args.variant_caller ## Read in Pangolin lineage file if pangolin_files: - merged_table['LINEAGE'] = get_pangolin_lineage(pangolin_files[sample]) + merged_table["LINEAGE"] = get_pangolin_lineage(pangolin_files[sample]) sample_tables.append(merged_table) ## Merge table across samples if sample_tables: merged_tables = pd.concat(sample_tables) - merged_tables.to_csv(args.output_file, index=False, encoding='utf-8-sig') + merged_tables.to_csv(args.output_file, index=False, encoding="utf-8-sig") -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/bin/multiqc_to_custom_csv.py b/bin/multiqc_to_custom_csv.py index 90a4c21a..5ce8c36d 100755 --- a/bin/multiqc_to_custom_csv.py +++ b/bin/multiqc_to_custom_csv.py @@ -8,7 +8,9 @@ def parse_args(args=None): - Description = "Create custom spreadsheet for pertinent MultiQC metrics generated by the nf-core/viralrecon pipeline." + Description = ( + "Create custom spreadsheet for pertinent MultiQC metrics generated by the nf-core/viralrecon pipeline." + ) Epilog = "Example usage: python multiqc_to_custom_tsv.py" parser = argparse.ArgumentParser(description=Description, epilog=Epilog) parser.add_argument( @@ -58,9 +60,7 @@ def find_tag(d, tag): yield i -def yaml_fields_to_dict( - yaml_file, append_dict={}, field_mapping_list=[], valid_sample_list=[] -): +def yaml_fields_to_dict(yaml_file, append_dict={}, field_mapping_list=[], valid_sample_list=[]): integer_fields = [ "mapped_passed", "number_of_SNPs", @@ -94,9 +94,7 @@ def yaml_fields_to_dict( val = list(find_tag(yaml_dict[k], j[0])) ## Fix for Cutadapt reporting reads/pairs as separate values if j[0] == "r_written" and len(val) == 0: - val = [ - list(find_tag(yaml_dict[k], "pairs_written"))[0] * 2 - ] + val = [list(find_tag(yaml_dict[k], "pairs_written"))[0] * 2] if len(val) != 0: val = val[0] if len(j) == 2: @@ -134,9 +132,7 @@ def yaml_fields_to_dict( return append_dict -def metrics_dict_to_file( - file_field_list, multiqc_data_dir, out_file, valid_sample_list=[] -): +def metrics_dict_to_file(file_field_list, multiqc_data_dir, out_file, valid_sample_list=[]): metrics_dict = {} field_list = [] for yaml_file, mapping_list in file_field_list: @@ -159,7 +155,7 @@ def metrics_dict_to_file( for field in field_list: if field in metrics_dict[k]: if metrics_dict[k][field]: - row_list.append(str(metrics_dict[k][field]).replace(',', ';')) + row_list.append(str(metrics_dict[k][field]).replace(",", ";")) else: row_list.append("NA") else: @@ -186,9 +182,7 @@ def main(args=None): [ ( "% Non-host reads (Kraken 2)", - [ - "PREPROCESS: Kraken 2_mqc-generalstats-preprocess_kraken_2-Unclassified" - ], + ["PREPROCESS: Kraken 2_mqc-generalstats-preprocess_kraken_2-Unclassified"], ) ], ), @@ -206,9 +200,7 @@ def main(args=None): [ ( "Coverage median", - [ - "VARIANTS: mosdepth_mqc-generalstats-variants_mosdepth-median_coverage" - ], + ["VARIANTS: mosdepth_mqc-generalstats-variants_mosdepth-median_coverage"], ), ( "% Coverage > 1x", @@ -253,9 +245,7 @@ def main(args=None): [ ( "% Non-host reads (Kraken 2)", - [ - "PREPROCESS: Kraken 2_mqc-generalstats-preprocess_kraken_2-Unclassified" - ], + ["PREPROCESS: Kraken 2_mqc-generalstats-preprocess_kraken_2-Unclassified"], ) ], ), @@ -347,9 +337,7 @@ def main(args=None): elif args.PLATFORM == "nanopore": ## List of real samples to output in report sample_list = [] - yaml_file = os.path.join( - args.MULTIQC_DATA_DIR, "multiqc_samtools_flagstat.yaml" - ) + yaml_file = os.path.join(args.MULTIQC_DATA_DIR, "multiqc_samtools_flagstat.yaml") if os.path.exists(yaml_file): metrics_dict = yaml_fields_to_dict( yaml_file=yaml_file, diff --git a/conf/base.config b/conf/base.config index 5ecbe848..443a49a1 100644 --- a/conf/base.config +++ b/conf/base.config @@ -19,6 +19,16 @@ process { maxErrors = '-1' // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } withLabel:process_low { cpus = { check_max( 2 * task.attempt, 'cpus' ) } memory = { check_max( 12.GB * task.attempt, 'memory' ) } diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 5cd076ae..05424060 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Config file for defining DSL2 per module options and publishing paths -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Available keys to override module options: ext.args = Additional arguments appended to command in module. ext.args2 = Second set of arguments appended to command in module (multi-tool modules). @@ -39,6 +39,15 @@ process { ] } + withName: 'CUSTOM_GETCHROMSIZES' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + withName: 'CAT_FASTQ' { publishDir = [ path: { "${params.outdir}/fastq" }, @@ -49,7 +58,7 @@ process { if (!params.skip_fastqc) { process { - withName: '.*:.*:FASTQC_FASTP:FASTQC_RAW' { + withName: '.*:.*:FASTQ_TRIM_FASTP_FASTQC:FASTQC_RAW' { ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/fastqc/raw" }, @@ -83,18 +92,11 @@ if (!params.skip_fastp) { ] ] } - - withName: 'MULTIQC_TSV_FAIL_READS' { - publishDir = [ - path: { "${params.outdir}/multiqc" }, - enabled: false - ] - } } if (!params.skip_fastqc) { process { - withName: '.*:.*:FASTQC_FASTP:FASTQC_TRIM' { + withName: '.*:.*:FASTQ_TRIM_FASTP_FASTQC:FASTQC_TRIM' { ext.args = '--quiet' publishDir = [ path: { "${params.outdir}/fastqc/trim" }, @@ -162,7 +164,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:ALIGN_BOWTIE2:.*:SAMTOOLS_SORT' { + withName: '.*:.*:FASTQ_ALIGN_BOWTIE2:.*:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.sorted" } publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, @@ -171,7 +173,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:ALIGN_BOWTIE2:.*:SAMTOOLS_INDEX' { + withName: '.*:.*:FASTQ_ALIGN_BOWTIE2:.*:SAMTOOLS_INDEX' { publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, mode: params.publish_dir_mode, @@ -179,7 +181,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:ALIGN_BOWTIE2:.*:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:.*:FASTQ_ALIGN_BOWTIE2:.*:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.sorted.bam" } publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, @@ -187,26 +189,6 @@ if (!params.skip_variants) { pattern: "*.{stats,flagstat,idxstats}" ] } - - withName: 'MULTIQC_TSV_FAIL_MAPPED' { - publishDir = [ - path: { "${params.outdir}/multiqc" }, - enabled: false - ] - } - } - - if (params.protocol == 'amplicon' || !params.skip_asciigenome) { - process { - withName: 'CUSTOM_GETCHROMSIZES' { - publishDir = [ - path: { "${params.outdir}/genome" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, - enabled: params.save_reference - ] - } - } } if (!params.skip_ivar_trim && params.protocol == 'amplicon') { @@ -225,7 +207,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:PRIMER_TRIM_IVAR:.*:SAMTOOLS_SORT' { + withName: '.*:.*:BAM_TRIM_PRIMERS_IVAR:.*:SAMTOOLS_SORT' { ext.prefix = { "${meta.id}.ivar_trim.sorted" } publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, @@ -235,7 +217,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:PRIMER_TRIM_IVAR:.*:SAMTOOLS_INDEX' { + withName: '.*:.*:BAM_TRIM_PRIMERS_IVAR:.*:SAMTOOLS_INDEX' { publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, mode: params.publish_dir_mode, @@ -244,7 +226,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:PRIMER_TRIM_IVAR:.*:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:.*:BAM_TRIM_PRIMERS_IVAR:.*:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.ivar_trim.sorted.bam" } publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, @@ -277,7 +259,7 @@ if (!params.skip_variants) { ] } - withName: '.*:MARK_DUPLICATES_PICARD:SAMTOOLS_INDEX' { + withName: '.*:BAM_MARKDUPLICATES_PICARD:SAMTOOLS_INDEX' { publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, mode: params.publish_dir_mode, @@ -285,7 +267,7 @@ if (!params.skip_variants) { ] } - withName: '.*:MARK_DUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { + withName: '.*:BAM_MARKDUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { ext.prefix = { "${meta.id}.markduplicates.sorted.bam" } publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, @@ -670,13 +652,6 @@ if (!params.skip_variants) { saveAs: { filename -> filename.endsWith(".csv") && !filename.endsWith("errors.csv") && !filename.endsWith("insertions.csv") ? filename : null } ] } - - withName: 'MULTIQC_TSV_NEXTCLADE' { - publishDir = [ - path: { "${params.outdir}/multiqc" }, - enabled: false - ] - } } } diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index 98d06e91..5a4a277f 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Config file for defining DSL2 per module options and publishing paths -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Available keys to override module options: ext.args = Additional arguments appended to command in module. ext.args2 = Second set of arguments appended to command in module (multi-tool modules). @@ -33,13 +33,6 @@ process { ] } - withName: 'MULTIQC_TSV_BARCODE_COUNT|MULTIQC_TSV_GUPPYPLEX_COUNT' { - publishDir = [ - path: { "${params.outdir}/multiqc/${params.artic_minion_caller}" }, - enabled: false - ] - } - withName: 'ARTIC_GUPPYPLEX' { ext.args = params.primer_set_version == 1200 ? '--min-length 250 --max-length 1500' : '--min-length 400 --max-length 700' publishDir = [ @@ -120,20 +113,10 @@ process { // Optional configuration options // -if (params.input) { - process { - withName: 'MULTIQC_TSV_NO_.*' { - publishDir = [ - path: { "${params.outdir}/multiqc/${params.artic_minion_caller}" }, - enabled: false - ] - } - } -} - if (params.sequencing_summary && !params.skip_pycoqc) { process { withName: 'PYCOQC' { + ext.prefix = 'pycoqc' publishDir = [ path: { "${params.outdir}/pycoqc" }, mode: params.publish_dir_mode, @@ -243,13 +226,6 @@ if (!params.skip_nextclade) { saveAs: { filename -> filename.endsWith(".csv") && !filename.endsWith("errors.csv") && !filename.endsWith("insertions.csv") ? filename : null } ] } - - withName: 'MULTIQC_TSV_NEXTCLADE' { - publishDir = [ - path: { "${params.outdir}/multiqc/${params.artic_minion_caller}" }, - enabled: false - ] - } } } diff --git a/conf/test.config b/conf/test.config index 45dad62c..15e93a4b 100644 --- a/conf/test.config +++ b/conf/test.config @@ -20,7 +20,7 @@ params { max_time = '6.h' // Input data to test amplicon analysis - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_test_amplicon_illumina.csv' platform = 'illumina' protocol = 'amplicon' primer_set = 'artic' @@ -35,5 +35,4 @@ params { // Assembly options assemblers = 'spades,unicycler,minia' - skip_plasmidid = true // Skip this by default to bypass Github Actions disk quota errors } diff --git a/conf/test_full.config b/conf/test_full.config index 76c17fcb..3c9b7ec4 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -15,7 +15,7 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full test of amplicon analysis - input = 's3://nf-core-awsmegatests/viralrecon/input_data/210212_K00102_0557_AHKN3LBBXY/samplesheet.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_full_amplicon_illumina.csv' platform = 'illumina' protocol = 'amplicon' primer_set = 'artic' @@ -29,7 +29,6 @@ params { // Assembly options assemblers = 'spades,unicycler,minia' - skip_plasmidid = true // Skip this by default to bypass Github Actions disk quota errors } process { diff --git a/conf/test_full_nanopore.config b/conf/test_full_nanopore.config index 856246ea..2536ea4b 100644 --- a/conf/test_full_nanopore.config +++ b/conf/test_full_nanopore.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for running full-size tests -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a full size pipeline test. Use as follows: @@ -16,15 +16,15 @@ params { // Input data for full test of amplicon analysis platform = 'nanopore' - input = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/samplesheet.csv' - fastq_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/fastq_pass/' - fast5_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/fast5_pass/' - sequencing_summary = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/sequencing_summary.txt' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_full_amplicon_nanopore.csv' + fastq_dir = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/fastq_pass/' + fast5_dir = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/fast5_pass/' + sequencing_summary = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/sequencing_summary.txt' // Genome references genome = 'MN908947.3' primer_set_version = 3 // Other parameters - artic_minion_medaka_model = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/r941_min_high_g360_model.hdf5' + artic_minion_medaka_model = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/r941_min_high_g360_model.hdf5' } diff --git a/conf/test_full_sispa.config b/conf/test_full_sispa.config index 7c190905..90bc0842 100644 --- a/conf/test_full_sispa.config +++ b/conf/test_full_sispa.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for running full-size tests -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a full size pipeline test. Use as follows: @@ -15,7 +15,7 @@ params { config_profile_description = 'Full test dataset to check pipeline function' // Input data for full test of SISPA/metagenomics analysis - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_sispa.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_full_metagenomic_illumina.csv' platform = 'illumina' protocol = 'metagenomic' diff --git a/conf/test_nanopore.config b/conf/test_nanopore.config index 853d4a05..406c8c3e 100644 --- a/conf/test_nanopore.config +++ b/conf/test_nanopore.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for running minimal tests -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: @@ -21,15 +21,15 @@ params { // Input data to test nanopore analysis platform = 'nanopore' - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_nanopore.csv' - fastq_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/fastq_pass/' - fast5_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/fast5_pass/' - sequencing_summary = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/sequencing_summary.txt' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_test_amplicon_nanopore.csv' + fastq_dir = 's3://ngi-igenomes/test-data/viralrecon/minion_test/fastq_pass/' + fast5_dir = 's3://ngi-igenomes/test-data/viralrecon/minion_test/fast5_pass/' + sequencing_summary = 's3://ngi-igenomes/test-data/viralrecon/minion_test/sequencing_summary.txt' // Genome references genome = 'MN908947.3' primer_set_version = 3 // Other parameters - artic_minion_medaka_model = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/r941_min_high_g360_model.hdf5' + artic_minion_medaka_model = 's3://ngi-igenomes/test-data/viralrecon/minion_test/r941_min_high_g360_model.hdf5' } diff --git a/conf/test_sispa.config b/conf/test_sispa.config index c918e194..d3e39be8 100644 --- a/conf/test_sispa.config +++ b/conf/test_sispa.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for running minimal tests -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: @@ -20,7 +20,7 @@ params { max_time = '6.h' // Input data to test SISPA/metagenomics analysis - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_sispa.csv' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_test_metagenomic_illumina.csv' platform = 'illumina' protocol = 'metagenomic' @@ -33,5 +33,4 @@ params { // Assembly options assemblers = 'spades,unicycler,minia' - skip_plasmidid = true // Skip this by default to bypass Github Actions disk quota errors } diff --git a/docs/images/nf-core-viralrecon_metro_map.svg b/docs/images/nf-core-viralrecon_metro_map.svg new file mode 100644 index 00000000..38e6792c --- /dev/null +++ b/docs/images/nf-core-viralrecon_metro_map.svg @@ -0,0 +1,7257 @@ + + + +image/svg+xmlviralreconSTAGE1. Pre-processing2. Alignment & BAM post-processing3. Variant calling4. Consensus calling5. De novo assembly6. Final QCFASTQcatfastqFastQCfastpKraken2FastQCilluminapicardCollectMultipleMetricsmosdepthBowtie2BCFToolsiVarvariantsBCFToolsPangolinNextcladeQUASTSnpSiftTSVVariantslong tableMultiQCHTMLSPAdesBlastABACAScutadaptminiaUnicyclerQUASTPlasmidIDBandageimageMultiQCHTMLMETHODVariant calling - Variants: iVar, Consensus: iVarVariant calling - Variants: iVar, Consensus: BCFToolsVariant calling - Variants: BCFTools, Consensus: iVarVariant calling - Variants: BCFTools, Consensus: BCFToolsDe novo assemblyASCIIGenomeSAMtoolsiVartrimpicardMarkDuplicatesSnpEffiVarconsensusLicense:125346nanoporeviralreconSTAGE1. Pre-processing2. Alignment, variant & consensus calling3. Consensus analysis4. Variant analysis5. Final QC43125License:NanoPlotpycoQCarticguppyplexFAST5articminionvcflibvcfuniqASCIIGenomeQUASTSnpEffPangolinSnpSiftNextclademosdepthVariantslong tableMultiQCTSVHTMLVCFFASTASamtoolsview diff --git a/docs/images/nf-core-viralrecon_metro_map_illumina.png b/docs/images/nf-core-viralrecon_metro_map_illumina.png new file mode 100644 index 00000000..fd5f2928 Binary files /dev/null and b/docs/images/nf-core-viralrecon_metro_map_illumina.png differ diff --git a/docs/images/nf-core-viralrecon_metro_map_nanopore.png b/docs/images/nf-core-viralrecon_metro_map_nanopore.png new file mode 100644 index 00000000..5fe0cdf1 Binary files /dev/null and b/docs/images/nf-core-viralrecon_metro_map_nanopore.png differ diff --git a/docs/usage.md b/docs/usage.md index b8416d20..1f27af21 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -4,13 +4,17 @@ > _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ +## Pipeline parameters + +Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration except for parameters; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). + ## Samplesheet format ### Illumina You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. -```console +```bash --input '[path to samplesheet file]' ``` @@ -176,7 +180,7 @@ nextflow run nf-core/viralrecon \ The typical command for running the pipeline is as follows: -```console +```bash nextflow run nf-core/viralrecon --input samplesheet.csv --outdir --genome 'MN908947.3' -profile docker ``` @@ -184,9 +188,9 @@ This will launch the pipeline with the `docker` configuration profile. See below Note that the pipeline will create the following files in your working directory: -```console +```bash work # Directory containing the nextflow working files - # Finished results in specified location (defined with --outdir) + # Finished results in specified location (defined with --outdir) .nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -195,7 +199,7 @@ work # Directory containing the nextflow working files When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: -```console +```bash nextflow pull nf-core/viralrecon ``` @@ -203,9 +207,9 @@ nextflow pull nf-core/viralrecon It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -First, go to the [nf-core/viralrecon releases page](https://github.com/nf-core/viralrecon/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +First, go to the [nf-core/viralrecon releases page](https://github.com/nf-core/viralrecon/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. ## Core Nextflow arguments @@ -215,7 +219,7 @@ This version number will be logged in reports when you run the pipeline, so that Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. > We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. @@ -224,8 +228,11 @@ The pipeline also dynamically loads configurations from [https://github.com/nf-c Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters - `docker` - A generic configuration profile to be used with [Docker](https://docker.com/) - `singularity` @@ -238,9 +245,6 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -- `test` - - A profile with a complete configuration for automated testing - - Includes links to test data so needs no other parameters ### `-resume` @@ -289,8 +293,14 @@ Work dir: Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` ``` +#### For beginners + +A first step to bypass this error, you could try to increase the amount of CPUs, memory, and time for the whole pipeline. Therefor you can try to increase the resource for the parameters `--max_cpus`, `--max_memory`, and `--max_time`. Based on the error above, you have to increase the amount of memory. Therefore you can go to the [parameter documentation of rnaseq](https://nf-co.re/rnaseq/3.9/parameters) and scroll down to the `show hidden parameter` button to get the default value for `--max_memory`. In this case 128GB, you than can try to run your pipeline again with `--max_memory 200GB -resume` to skip all process, that were already calculated. If you can not increase the resource of the complete pipeline, you can try to adapt the resource for a single process as mentioned below. + +#### Advanced option on process level + To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). -We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. +We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/star/align/main.nf`. If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. @@ -309,9 +319,10 @@ process { > > If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. -### Updating containers +### Updating containers (advanced users) The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. +For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. #### Pangolin @@ -413,6 +424,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). +## Azure Resource Requests + +To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. +We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. + +Note that the choice of VM size depends on your quota and the overall workload during the analysis. +For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). + ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. @@ -427,6 +446,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): -```console +```bash NXF_OPTS='-Xms1g -Xmx4g' ``` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index b3d092f8..33cd4f6e 100755 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -46,7 +46,6 @@ class NfcoreSchema { 'quiet', 'syslog', 'v', - 'version', // Options for `nextflow run` command 'ansi', diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index d244008f..08d2922c 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -32,6 +32,40 @@ class NfcoreTemplate { } } + // + // Warn if using custom configs to provide pipeline parameters + // + public static void warnParamsProvidedInConfig(workflow, log) { + if (workflow.configFiles.size() > 1) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Multiple config files detected!\n" + + " Please provide pipeline parameters via the CLI or Nextflow '-params-file' option.\n" + + " Custom config files including those provided by the '-c' Nextflow option can be\n" + + " used to provide any configuration except for parameters.\n\n" + + " Docs: https://nf-co.re/usage/configuration#custom-configuration-files\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + } + + // + // Generate version string + // + public static String version(workflow) { + String version_string = "" + + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string + } + // // Construct and send completion email // @@ -64,7 +98,7 @@ class NfcoreTemplate { misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp def email_fields = [:] - email_fields['version'] = workflow.manifest.version + email_fields['version'] = NfcoreTemplate.version(workflow) email_fields['runName'] = workflow.runName email_fields['success'] = workflow.success email_fields['dateComplete'] = workflow.complete @@ -150,6 +184,64 @@ class NfcoreTemplate { output_tf.withWriter { w -> w << email_txt } } + // + // Construct and send a notification to a web server as JSON + // e.g. Microsoft Teams and Slack + // + public static void IM_notification(workflow, params, summary_params, projectDir, log) { + def hook_url = params.hook_url + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) misc_fields['repository'] = workflow.repository + if (workflow.commitId) misc_fields['commitid'] = workflow.commitId + if (workflow.revision) misc_fields['revision'] = workflow.revision + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = NfcoreTemplate.version(workflow) + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("$projectDir/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection(); + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")); + def postRC = post.getResponseCode(); + if (! postRC.equals(200)) { + log.warn(post.getErrorStream().getText()); + } + } + // // Print pipeline summary on completion // @@ -182,7 +274,7 @@ class NfcoreTemplate { if (workflow.stats.ignoredCount == 0) { log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" } } else { log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" @@ -270,6 +362,7 @@ class NfcoreTemplate { // public static String logo(workflow, monochrome_logs) { Map colors = logColours(monochrome_logs) + String workflow_version = NfcoreTemplate.version(workflow) String.format( """\n ${dashedLine(monochrome_logs)} @@ -278,7 +371,7 @@ class NfcoreTemplate { ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} ${colors.green}`._,._,\'${colors.reset} - ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} ${dashedLine(monochrome_logs)} """.stripIndent() ) diff --git a/lib/Utils.groovy b/lib/Utils.groovy old mode 100755 new mode 100644 index 28567bd7..fd6095c7 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -21,20 +21,27 @@ class Utils { } // Check that all channels are present - def required_channels = ['conda-forge', 'bioconda', 'defaults'] - def conda_check_failed = !required_channels.every { ch -> ch in channels } + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean // Check that they are in the right order - conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) - conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + def channel_priority_violation = false + def n = required_channels_in_order.size() + for (int i = 0; i < n - 1; i++) { + channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + } - if (conda_check_failed) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + if (channels_missing | channel_priority_violation) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + - " NB: The order of the channels matters!\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + " Please refer to https://bioconda.github.io/\n" + + " The observed channel order is \n" + + " ${channels}\n" + + " but the following channel order is required:\n" + + " ${required_channels_in_order}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } } diff --git a/lib/WorkflowCommons.groovy b/lib/WorkflowCommons.groovy index 5672a1cf..a1ec9776 100755 --- a/lib/WorkflowCommons.groovy +++ b/lib/WorkflowCommons.groovy @@ -9,11 +9,11 @@ class WorkflowCommons { // private static void genomeExistsError(params, log) { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "=============================================================================\n" + + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + - "=============================================================================" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" System.exit(1) } } @@ -45,6 +45,18 @@ class WorkflowCommons { return yaml_file_text } + // + // Create MultiQC tsv custom content from a list of values + // + public static String multiqcTsvFromList(tsv_data, header) { + def tsv_string = "" + if (tsv_data.size() > 0) { + tsv_string += "${header.join('\t')}\n" + tsv_string += tsv_data.join('\n') + } + return tsv_string + } + // // Function to check whether primer BED file has the correct suffixes as provided to the pipeline // @@ -62,14 +74,14 @@ class WorkflowCommons { ) } if (total != (left + right)) { - log.warn "=============================================================================\n" + + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Please check the name field (column 4) in the file supplied via --primer_bed.\n\n" + " All of the values in that column do not end with those supplied by:\n" + " --primer_left_suffix : $primer_left_suffix\n" + " --primer_right_suffix: $primer_right_suffix\n\n" + " This information is required to collapse the primer intervals into amplicons\n" + " for the coverage plots generated by the pipeline.\n" + - "===================================================================================" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } @@ -109,13 +121,13 @@ class WorkflowCommons { def intersect = bed_contigs.intersect(fai_contigs) if (intersect.size() != bed_contigs.size()) { def diff = bed_contigs.minus(intersect).sort() - log.error "=============================================================================\n" + + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Contigs in primer BED file do not match those in the reference genome:\n\n" + " ${diff.join('\n ')}\n\n" + " Please check:\n" + " - Primer BED file supplied with --primer_bed\n" + " - Genome FASTA file supplied with --fasta\n" + - "=============================================================================" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" System.exit(1) } } diff --git a/lib/WorkflowIllumina.groovy b/lib/WorkflowIllumina.groovy index 56b20612..f151b2e1 100755 --- a/lib/WorkflowIllumina.groovy +++ b/lib/WorkflowIllumina.groovy @@ -75,11 +75,11 @@ class WorkflowIllumina { if (line.contains('>')) { count++ if (count > 1) { - log.warn "=============================================================================\n" + + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " This pipeline does not officially support multi-fasta genome files!\n\n" + " The parameters and processes are tailored for viral genome analysis.\n" + " Please amend the '--fasta' parameter.\n" + - "===================================================================================" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" break } } @@ -118,12 +118,12 @@ class WorkflowIllumina { if (name.contains(name_prefix)) { count++ if (count > 1) { - log.warn "=============================================================================\n" + + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Found '${name_prefix}' in the name field of the primer BED file!\n" + " This suggests that you have used the SWIFT/SNAP protocol to prep your samples.\n" + " If so, please set '--ivar_trim_offset 5' as suggested in the issue below:\n" + " https://github.com/nf-core/viralrecon/issues/170\n" + - "===================================================================================" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" break } } diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index 9778e073..64147d3d 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -18,7 +18,7 @@ class WorkflowMain { } // - // Print help to screen if required + // Generate help string // public static String help(workflow, params, log) { def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --outdir --genome 'MN908947.3' -profile docker" @@ -31,7 +31,7 @@ class WorkflowMain { } // - // Print parameter summary log to screen + // Generate parameter summary log string // public static String paramsSummaryLog(workflow, params, log) { def summary_log = '' @@ -52,19 +52,29 @@ class WorkflowMain { System.exit(0) } - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) + // Print workflow version and exit on --version + if (params.version) { + String workflow_version = NfcoreTemplate.version(workflow) + log.info "${workflow.manifest.name} ${workflow_version}" + System.exit(0) } // Print parameter summary log to screen log.info paramsSummaryLog(workflow, params, log) + // Warn about using custom configs to provide pipeline parameters + NfcoreTemplate.warnParamsProvidedInConfig(workflow, log) + + // Validate workflow parameters via the JSON schema + if (params.validate_params) { + NfcoreSchema.validateParameters(workflow, params, log) + } + // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) // Check that conda channels are set-up correctly - if (params.enable_conda) { + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { Utils.checkCondaChannels(log) } @@ -89,7 +99,6 @@ class WorkflowMain { } } } - // // Get attribute from genome config file e.g. fasta // @@ -99,7 +108,7 @@ class WorkflowMain { " - https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config\n\n" + " If you would still like to blame us please come and find us on nf-core Slack:\n" + " - https://nf-co.re/viralrecon#contributions-and-support\n" + - "=============================================================================" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { def genome_map = params.genomes[ params.genome ] if (primer_set) { @@ -111,7 +120,7 @@ class WorkflowMain { if (genome_map.containsKey(primer_set_version)) { genome_map = genome_map[ primer_set_version ] } else { - log.error "=============================================================================\n" + + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " --primer_set_version '${primer_set_version}' not found!\n\n" + " Currently, the available primer set version keys are: ${genome_map.keySet().join(", ")}\n\n" + " Please check:\n" + @@ -122,7 +131,7 @@ class WorkflowMain { System.exit(1) } } else { - log.error "=============================================================================\n" + + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " --primer_set '${primer_set}' not found!\n\n" + " Currently, the available primer set keys are: ${genome_map.keySet().join(", ")}\n\n" + " Please check:\n" + @@ -132,7 +141,7 @@ class WorkflowMain { System.exit(1) } } else { - log.error "=============================================================================\n" + + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' does not contain any primer sets!\n\n" + " Please check:\n" + " - The value provided to --genome (currently '${params.genome}')\n" + diff --git a/main.nf b/main.nf index 4d662339..c659b05c 100644 --- a/main.nf +++ b/main.nf @@ -4,6 +4,7 @@ nf-core/viralrecon ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/viralrecon + Website: https://nf-co.re/viralrecon Slack : https://nfcore.slack.com/channels/viralrecon ---------------------------------------------------------------------------------------- diff --git a/modules.json b/modules.json index 97603779..4e7dec6f 100644 --- a/modules.json +++ b/modules.json @@ -2,159 +2,289 @@ "name": "nf-core/viralrecon", "homePage": "https://github.com/nf-core/viralrecon", "repos": { - "nf-core/modules": { - "abacas": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "artic/guppyplex": { - "git_sha": "589f39c39e05fdd9493e765b1d2b4385d3b68fde" - }, - "artic/minion": { - "git_sha": "589f39c39e05fdd9493e765b1d2b4385d3b68fde" - }, - "bandage/image": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bcftools/consensus": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "bcftools/filter": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "bcftools/mpileup": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "bcftools/norm": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "bcftools/query": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "bcftools/sort": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "bcftools/stats": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "bedtools/getfasta": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bedtools/maskfasta": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bedtools/merge": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "blast/blastn": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "blast/makeblastdb": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "bowtie2/align": { - "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c" - }, - "bowtie2/build": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "cat/fastq": { - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" - }, - "custom/dumpsoftwareversions": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "custom/getchromsizes": { - "git_sha": "213403187932dbbdd936a04474cc8cd8abae7a08" - }, - "fastp": { - "git_sha": "9b51362a532a14665f513cf987531f9ea5046b74" - }, - "fastqc": { - "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" - }, - "gunzip": { - "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" - }, - "ivar/consensus": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "ivar/trim": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "ivar/variants": { - "git_sha": "cab399507bea60d90de6d7b296163210c371b693" - }, - "kraken2/kraken2": { - "git_sha": "abe025677cdd805cc93032341ab19885473c1a07" - }, - "minia": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "mosdepth": { - "git_sha": "72a31b76eb1b58879e0d91fb1d992e0118693098" - }, - "nanoplot": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "nextclade/datasetget": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "nextclade/run": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "pangolin": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "picard/collectmultiplemetrics": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "picard/markduplicates": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "plasmidid": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "pycoqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "quast": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "samtools/flagstat": { - "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" - }, - "samtools/idxstats": { - "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" - }, - "samtools/index": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/sort": { - "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" - }, - "samtools/stats": { - "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" - }, - "samtools/view": { - "git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce" - }, - "spades": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" - }, - "tabix/bgzip": { - "git_sha": "37bf3936f3665483d070a5e0e0b314311032af7c" - }, - "tabix/tabix": { - "git_sha": "b3e9b88e80880f450ad79a95b2b7aa05e1de5484" - }, - "unicycler": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" - }, - "untar": { - "git_sha": "51be617b1ca9bff973655eb899d591ed6ab253b5" - }, - "vcflib/vcfuniq": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + "https://github.com/nf-core/modules.git": { + "modules": { + "nf-core": { + "abacas": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "artic/guppyplex": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "artic/minion": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "bandage/image": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "bcftools/consensus": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "bcftools/filter": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "bcftools/mpileup": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "bcftools/norm": { + "branch": "master", + "git_sha": "bcad95fb35e567ad25840d3297c3e17eff211a3a", + "installed_by": ["modules"] + }, + "bcftools/query": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "bcftools/sort": { + "branch": "master", + "git_sha": "4a21e4cca35e72ec059abd67f790e0b192ce5d81", + "installed_by": ["modules"] + }, + "bcftools/stats": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "bedtools/getfasta": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "bedtools/maskfasta": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "bedtools/merge": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "blast/blastn": { + "branch": "master", + "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "installed_by": ["modules"] + }, + "blast/makeblastdb": { + "branch": "master", + "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "installed_by": ["modules"] + }, + "bowtie2/align": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules", "fastq_align_bowtie2"] + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "cat/fastq": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "installed_by": ["modules"] + }, + "custom/getchromsizes": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "20a508676f40d0fd3f911ac595af91ec845704c4", + "installed_by": ["modules"] + }, + "fastqc": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "gunzip": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "ivar/consensus": { + "branch": "master", + "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "installed_by": ["modules"] + }, + "ivar/trim": { + "branch": "master", + "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "installed_by": ["modules"] + }, + "ivar/variants": { + "branch": "master", + "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "installed_by": ["modules"] + }, + "kraken2/kraken2": { + "branch": "master", + "git_sha": "7c695e0147df1157413e06246d9b0094617d3e6b", + "installed_by": ["modules"] + }, + "minia": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "mosdepth": { + "branch": "master", + "git_sha": "def5f182583df0c20f43ec3d4355e8ebd341aaa9", + "installed_by": ["modules"] + }, + "nanoplot": { + "branch": "master", + "git_sha": "3822e04e49b6d89b7092feb3480d744cb5d9986b", + "installed_by": ["modules"] + }, + "nextclade/datasetget": { + "branch": "master", + "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "installed_by": ["modules"] + }, + "nextclade/run": { + "branch": "master", + "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "installed_by": ["modules"] + }, + "pangolin": { + "branch": "master", + "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "installed_by": ["modules"] + }, + "picard/collectmultiplemetrics": { + "branch": "master", + "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", + "installed_by": ["modules"] + }, + "picard/markduplicates": { + "branch": "master", + "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6", + "installed_by": ["modules", "bam_markduplicates_picard"] + }, + "plasmidid": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "pycoqc": { + "branch": "master", + "git_sha": "cb8a5428685f490d0295563b1b0c3a239bbe1927", + "installed_by": ["modules"] + }, + "quast": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules", "bam_stats_samtools"] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules", "bam_stats_samtools"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["bam_markduplicates_picard", "modules", "bam_sort_stats_samtools"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules", "bam_sort_stats_samtools"] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules", "bam_stats_samtools"] + }, + "samtools/view": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + }, + "spades": { + "branch": "master", + "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180", + "installed_by": ["modules"] + }, + "tabix/bgzip": { + "branch": "master", + "git_sha": "90294980a903ecebd99ac31d8b6c66af48fa8259", + "installed_by": ["modules"] + }, + "tabix/tabix": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "unicycler": { + "branch": "master", + "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c", + "installed_by": ["modules"] + }, + "untar": { + "branch": "master", + "git_sha": "cc1f997fab6d8fde5dc0e6e2a310814df5b53ce7", + "installed_by": ["modules"] + }, + "vcflib/vcfuniq": { + "branch": "master", + "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b", + "installed_by": ["modules"] + } + } + }, + "subworkflows": { + "nf-core": { + "bam_markduplicates_picard": { + "branch": "master", + "git_sha": "6f1697c121719dedde9e0537b6ed6a9cb8c13583", + "installed_by": ["subworkflows"] + }, + "bam_sort_stats_samtools": { + "branch": "master", + "git_sha": "3911652a6b24249358f79e8b8466338d63efb2a2", + "installed_by": ["fastq_align_bowtie2"] + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "b4b7f89e7fd6d2293f0c176213f710e0bcdaf19e", + "installed_by": ["bam_sort_stats_samtools", "bam_markduplicates_picard"] + }, + "fastq_align_bowtie2": { + "branch": "master", + "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/asciigenome.nf b/modules/local/asciigenome.nf index 09ca6bb6..adf8cf14 100644 --- a/modules/local/asciigenome.nf +++ b/modules/local/asciigenome.nf @@ -2,7 +2,7 @@ process ASCIIGENOME { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::asciigenome=1.16.0 bioconda::bedtools=2.30.0" : null) + conda "bioconda::asciigenome=1.16.0 bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-093691b47d719890dc19ac0c13c4528e9776897f:27211b8c38006480d69eb1be3ef09a7bf0a49d76-0' : 'quay.io/biocontainers/mulled-v2-093691b47d719890dc19ac0c13c4528e9776897f:27211b8c38006480d69eb1be3ef09a7bf0a49d76-0' }" diff --git a/modules/local/collapse_primers.nf b/modules/local/collapse_primers.nf index 316d1e2a..4219b6ae 100644 --- a/modules/local/collapse_primers.nf +++ b/modules/local/collapse_primers.nf @@ -2,7 +2,7 @@ process COLLAPSE_PRIMERS { tag "$bed" label 'process_medium' - conda (params.enable_conda ? "conda-forge::python=3.9.5" : null) + conda "conda-forge::python=3.9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.9--1' : 'quay.io/biocontainers/python:3.9--1' }" diff --git a/modules/local/cutadapt.nf b/modules/local/cutadapt.nf index a96ea8bb..11c8f6a5 100644 --- a/modules/local/cutadapt.nf +++ b/modules/local/cutadapt.nf @@ -2,10 +2,10 @@ process CUTADAPT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::cutadapt=3.5' : null) + conda "bioconda::cutadapt=4.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/cutadapt:3.5--py39h38f01e4_0' : - 'quay.io/biocontainers/cutadapt:3.5--py39h38f01e4_0' }" + 'https://depot.galaxyproject.org/singularity/cutadapt:4.2--py39hbf8eff0_0' : + 'quay.io/biocontainers/cutadapt:4.2--py39hbf8eff0_0' }" input: tuple val(meta), path(reads) diff --git a/modules/local/filter_blastn.nf b/modules/local/filter_blastn.nf index 5e5ed81b..0328a037 100644 --- a/modules/local/filter_blastn.nf +++ b/modules/local/filter_blastn.nf @@ -2,7 +2,7 @@ process FILTER_BLASTN { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'ubuntu:20.04' }" diff --git a/modules/local/ivar_variants_to_vcf.nf b/modules/local/ivar_variants_to_vcf.nf index cd220b24..e6c88328 100644 --- a/modules/local/ivar_variants_to_vcf.nf +++ b/modules/local/ivar_variants_to_vcf.nf @@ -1,7 +1,7 @@ process IVAR_VARIANTS_TO_VCF { tag "$meta.id" - conda (params.enable_conda ? "conda-forge::python=3.9.5 conda-forge::matplotlib=3.5.1 conda-forge::pandas=1.3.5 conda-forge::r-sys=3.4 conda-forge::regex=2021.11.10 conda-forge::scipy=1.7.3" : null) + conda "conda-forge::python=3.9.5 conda-forge::matplotlib=3.5.1 conda-forge::pandas=1.3.5 conda-forge::r-sys=3.4 conda-forge::regex=2021.11.10 conda-forge::scipy=1.7.3 conda-forge::biopython=1.79" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-ff46c3f421ca930fcc54e67ab61c8e1bcbddfe22:1ad3da14f705eb0cdff6b5a44fea4909307524b4-0' : 'quay.io/biocontainers/mulled-v2-ff46c3f421ca930fcc54e67ab61c8e1bcbddfe22:1ad3da14f705eb0cdff6b5a44fea4909307524b4-0' }" diff --git a/modules/local/kraken2_build.nf b/modules/local/kraken2_build.nf index c5632aad..f1aa6865 100644 --- a/modules/local/kraken2_build.nf +++ b/modules/local/kraken2_build.nf @@ -2,7 +2,7 @@ process KRAKEN2_BUILD { tag "$library" label 'process_high' - conda (params.enable_conda ? 'bioconda::kraken2=2.1.2 conda-forge::pigz=2.6' : null) + conda "bioconda::kraken2=2.1.2 conda-forge::pigz=2.6" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' : 'quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' }" diff --git a/modules/local/make_bed_mask.nf b/modules/local/make_bed_mask.nf index 246a8ec5..c8c75bff 100644 --- a/modules/local/make_bed_mask.nf +++ b/modules/local/make_bed_mask.nf @@ -1,7 +1,7 @@ process MAKE_BED_MASK { tag "$meta.id" - conda (params.enable_conda ? "conda-forge::python=3.9.5 bioconda::samtools=1.14" : null) + conda "conda-forge::python=3.9.5 bioconda::samtools=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-1a35167f7a491c7086c13835aaa74b39f1f43979:6b5cffa1187cfccf2dc983ed3b5359d49b999eb0-0' : 'quay.io/biocontainers/mulled-v2-1a35167f7a491c7086c13835aaa74b39f1f43979:6b5cffa1187cfccf2dc983ed3b5359d49b999eb0-0' }" diff --git a/modules/local/make_variants_long_table.nf b/modules/local/make_variants_long_table.nf index d802ecc9..1d8b40fc 100644 --- a/modules/local/make_variants_long_table.nf +++ b/modules/local/make_variants_long_table.nf @@ -1,6 +1,6 @@ process MAKE_VARIANTS_LONG_TABLE { - conda (params.enable_conda ? "conda-forge::python=3.9.5 conda-forge::matplotlib=3.5.1 conda-forge::pandas=1.3.5 conda-forge::r-sys=3.4 conda-forge::regex=2021.11.10 conda-forge::scipy=1.7.3" : null) + conda "conda-forge::python=3.9.5 conda-forge::matplotlib=3.5.1 conda-forge::pandas=1.3.5 conda-forge::r-sys=3.4 conda-forge::regex=2021.11.10 conda-forge::scipy=1.7.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-77320db00eefbbf8c599692102c3d387a37ef02a:08144a66f00dc7684fad061f1466033c0176e7ad-0' : 'quay.io/biocontainers/mulled-v2-77320db00eefbbf8c599692102c3d387a37ef02a:08144a66f00dc7684fad061f1466033c0176e7ad-0' }" diff --git a/modules/local/multiqc_illumina.nf b/modules/local/multiqc_illumina.nf index 59a031c2..bcfc37f1 100644 --- a/modules/local/multiqc_illumina.nf +++ b/modules/local/multiqc_illumina.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? "bioconda::multiqc=1.13a" : null) + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : - 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path 'multiqc_config.yaml' diff --git a/modules/local/multiqc_nanopore.nf b/modules/local/multiqc_nanopore.nf index e23db35c..c6ffbb4d 100644 --- a/modules/local/multiqc_nanopore.nf +++ b/modules/local/multiqc_nanopore.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? "bioconda::multiqc=1.13a" : null) + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : - 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path 'multiqc_config.yaml' diff --git a/modules/local/multiqc_tsv_from_list.nf b/modules/local/multiqc_tsv_from_list.nf deleted file mode 100644 index a53c8f73..00000000 --- a/modules/local/multiqc_tsv_from_list.nf +++ /dev/null @@ -1,28 +0,0 @@ -process MULTIQC_TSV_FROM_LIST { - - executor 'local' - memory 100.MB - - input: - val tsv_data // [ ['foo', 1], ['bar', 1] ] - val header // [ 'name', 'number' ] - val out_prefix - - output: - path "*.tsv" - - when: - task.ext.when == null || task.ext.when - - exec: - // Generate file contents - def contents = "" - if (tsv_data.size() > 0) { - contents += "${header.join('\t')}\n" - contents += tsv_data.join('\n') - } - - // Write to file - def mqc_file = task.workDir.resolve("${out_prefix}_mqc.tsv") - mqc_file.text = contents -} diff --git a/modules/local/plot_base_density.nf b/modules/local/plot_base_density.nf index 97e7e93e..1932350a 100644 --- a/modules/local/plot_base_density.nf +++ b/modules/local/plot_base_density.nf @@ -2,7 +2,7 @@ process PLOT_BASE_DENSITY { tag "$fasta" label 'process_medium' - conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' : 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" diff --git a/modules/local/plot_mosdepth_regions.nf b/modules/local/plot_mosdepth_regions.nf index 31cbeeb3..0195549b 100644 --- a/modules/local/plot_mosdepth_regions.nf +++ b/modules/local/plot_mosdepth_regions.nf @@ -1,7 +1,7 @@ process PLOT_MOSDEPTH_REGIONS { label 'process_medium' - conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' : 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" diff --git a/modules/local/rename_fasta_header.nf b/modules/local/rename_fasta_header.nf index 36810983..d42e5f11 100644 --- a/modules/local/rename_fasta_header.nf +++ b/modules/local/rename_fasta_header.nf @@ -1,7 +1,7 @@ process RENAME_FASTA_HEADER { tag "$meta.id" - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'ubuntu:20.04' }" diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf index ffed8149..0da5f587 100644 --- a/modules/local/samplesheet_check.nf +++ b/modules/local/samplesheet_check.nf @@ -1,7 +1,8 @@ process SAMPLESHEET_CHECK { tag "$samplesheet" + label 'process_single' - conda (params.enable_conda ? "conda-forge::python=3.9.5" : null) + conda "conda-forge::python=3.9.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/python:3.9--1' : 'quay.io/biocontainers/python:3.9--1' }" diff --git a/modules/local/snpeff_ann.nf b/modules/local/snpeff_ann.nf index 4ccf4db4..c834c1c5 100644 --- a/modules/local/snpeff_ann.nf +++ b/modules/local/snpeff_ann.nf @@ -2,7 +2,7 @@ process SNPEFF_ANN { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::snpeff=5.0" : null) + conda "bioconda::snpeff=5.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' : 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" diff --git a/modules/local/snpeff_build.nf b/modules/local/snpeff_build.nf index faaeba19..e1ab367f 100644 --- a/modules/local/snpeff_build.nf +++ b/modules/local/snpeff_build.nf @@ -2,7 +2,7 @@ process SNPEFF_BUILD { tag "$fasta" label 'process_low' - conda (params.enable_conda ? "bioconda::snpeff=5.0" : null) + conda "bioconda::snpeff=5.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' : 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" diff --git a/modules/local/snpsift_extractfields.nf b/modules/local/snpsift_extractfields.nf index 573063e0..5654e97e 100644 --- a/modules/local/snpsift_extractfields.nf +++ b/modules/local/snpsift_extractfields.nf @@ -2,7 +2,7 @@ process SNPSIFT_EXTRACTFIELDS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::snpsift=4.3.1t" : null) + conda "bioconda::snpsift=4.3.1t" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/snpsift:4.3.1t--hdfd78af_3' : 'quay.io/biocontainers/snpsift:4.3.1t--hdfd78af_3' }" diff --git a/modules/nf-core/modules/abacas/main.nf b/modules/nf-core/abacas/main.nf similarity index 94% rename from modules/nf-core/modules/abacas/main.nf rename to modules/nf-core/abacas/main.nf index 00c9169f..beabc45b 100644 --- a/modules/nf-core/modules/abacas/main.nf +++ b/modules/nf-core/abacas/main.nf @@ -2,7 +2,7 @@ process ABACAS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::abacas=1.3.1" : null) + conda "bioconda::abacas=1.3.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/abacas:1.3.1--pl526_0' : 'quay.io/biocontainers/abacas:1.3.1--pl526_0' }" diff --git a/modules/nf-core/modules/abacas/meta.yml b/modules/nf-core/abacas/meta.yml similarity index 98% rename from modules/nf-core/modules/abacas/meta.yml rename to modules/nf-core/abacas/meta.yml index c685e650..3bab9b22 100644 --- a/modules/nf-core/modules/abacas/meta.yml +++ b/modules/nf-core/abacas/meta.yml @@ -12,7 +12,7 @@ tools: contigs based on a reference sequence. homepage: http://abacas.sourceforge.net/documentation.html documentation: http://abacas.sourceforge.net/documentation.html - tool_dev_url: None + doi: "10.1093/bioinformatics/btp347" licence: ["GPL v2-or-later"] diff --git a/modules/nf-core/modules/artic/guppyplex/main.nf b/modules/nf-core/artic/guppyplex/main.nf similarity index 77% rename from modules/nf-core/modules/artic/guppyplex/main.nf rename to modules/nf-core/artic/guppyplex/main.nf index 2fd518e0..9be33484 100644 --- a/modules/nf-core/modules/artic/guppyplex/main.nf +++ b/modules/nf-core/artic/guppyplex/main.nf @@ -2,10 +2,10 @@ process ARTIC_GUPPYPLEX { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::artic=1.2.2" : null) + conda "bioconda::artic=1.2.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/artic:1.2.2--pyhdfd78af_0' : - 'quay.io/biocontainers/artic:1.2.2--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/artic:1.2.3--pyhdfd78af_0' : + 'quay.io/biocontainers/artic:1.2.3--pyhdfd78af_0' }" input: tuple val(meta), path(fastq_dir) @@ -20,7 +20,7 @@ process ARTIC_GUPPYPLEX { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '1.2.2' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. + def VERSION = '1.2.3' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. """ artic \\ guppyplex \\ diff --git a/modules/nf-core/modules/artic/guppyplex/meta.yml b/modules/nf-core/artic/guppyplex/meta.yml similarity index 98% rename from modules/nf-core/modules/artic/guppyplex/meta.yml rename to modules/nf-core/artic/guppyplex/meta.yml index fe288289..e8edc8f3 100644 --- a/modules/nf-core/modules/artic/guppyplex/meta.yml +++ b/modules/nf-core/artic/guppyplex/meta.yml @@ -10,7 +10,7 @@ tools: homepage: https://artic.readthedocs.io/en/latest/ documentation: https://artic.readthedocs.io/en/latest/ tool_dev_url: https://github.com/artic-network/fieldbioinformatics - doi: "" + licence: ["MIT"] input: diff --git a/modules/nf-core/modules/artic/minion/main.nf b/modules/nf-core/artic/minion/main.nf similarity index 92% rename from modules/nf-core/modules/artic/minion/main.nf rename to modules/nf-core/artic/minion/main.nf index 1629d433..429a107d 100644 --- a/modules/nf-core/modules/artic/minion/main.nf +++ b/modules/nf-core/artic/minion/main.nf @@ -2,10 +2,10 @@ process ARTIC_MINION { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::artic=1.2.2" : null) + conda "bioconda::artic=1.2.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/artic:1.2.2--pyhdfd78af_0' : - 'quay.io/biocontainers/artic:1.2.2--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/artic:1.2.3--pyhdfd78af_0' : + 'quay.io/biocontainers/artic:1.2.3--pyhdfd78af_0' }" input: tuple val(meta), path(fastq) @@ -48,7 +48,7 @@ process ARTIC_MINION { model = medaka_model_file ? "--medaka-model ./$medaka_model_file" : "--medaka-model $medaka_model_string" } def hd5_plugin_path = task.ext.hd5_plugin_path ? "export HDF5_PLUGIN_PATH=" + task.ext.hd5_plugin_path : "export HDF5_PLUGIN_PATH=/usr/local/lib/python3.6/site-packages/ont_fast5_api/vbz_plugin" - def VERSION = '1.2.2' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. + def VERSION = '1.2.3' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. """ $hd5_plugin_path diff --git a/modules/nf-core/modules/artic/minion/meta.yml b/modules/nf-core/artic/minion/meta.yml similarity index 99% rename from modules/nf-core/modules/artic/minion/meta.yml rename to modules/nf-core/artic/minion/meta.yml index c0f97a0c..8ccf8434 100644 --- a/modules/nf-core/modules/artic/minion/meta.yml +++ b/modules/nf-core/artic/minion/meta.yml @@ -11,7 +11,7 @@ tools: homepage: https://artic.readthedocs.io/en/latest/ documentation: https://artic.readthedocs.io/en/latest/ tool_dev_url: https://github.com/artic-network/fieldbioinformatics - doi: "" + licence: ["MIT"] input: - meta: @@ -65,7 +65,7 @@ output: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - results: - type: + type: file description: Aggregated FastQ files pattern: "*.fastq.gz" - bam: diff --git a/modules/nf-core/modules/bandage/image/main.nf b/modules/nf-core/bandage/image/main.nf similarity index 93% rename from modules/nf-core/modules/bandage/image/main.nf rename to modules/nf-core/bandage/image/main.nf index ee504a12..e4da7336 100644 --- a/modules/nf-core/modules/bandage/image/main.nf +++ b/modules/nf-core/bandage/image/main.nf @@ -2,7 +2,7 @@ process BANDAGE_IMAGE { tag "${meta.id}" label 'process_low' - conda (params.enable_conda ? 'bioconda::bandage=0.8.1' : null) + conda "bioconda::bandage=0.8.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bandage:0.8.1--hc9558a2_2' : 'quay.io/biocontainers/bandage:0.8.1--hc9558a2_2' }" diff --git a/modules/nf-core/bandage/image/meta.yml b/modules/nf-core/bandage/image/meta.yml new file mode 100644 index 00000000..e68d8c98 --- /dev/null +++ b/modules/nf-core/bandage/image/meta.yml @@ -0,0 +1,44 @@ +name: bandage_image +description: Render an assembly graph in GFA 1.0 format to PNG and SVG image formats +keywords: + - gfa + - graph + - assembly + - visualisation +tools: + - bandage: + description: | + Bandage - a Bioinformatics Application for Navigating De novo Assembly Graphs Easily + homepage: https://github.com/rrwick/Bandage + documentation: https://github.com/rrwick/Bandage + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gfa: + type: file + description: Assembly graph in GFA 1.0 format + pattern: "*.gfa" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - png: + type: file + description: Bandage image in PNG format + pattern: "*.png" + - svg: + type: file + description: Bandage image in SVG format + pattern: "*.svg" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@heuermh" diff --git a/modules/nf-core/modules/bcftools/consensus/main.nf b/modules/nf-core/bcftools/consensus/main.nf similarity index 85% rename from modules/nf-core/modules/bcftools/consensus/main.nf rename to modules/nf-core/bcftools/consensus/main.nf index e28dc7f4..a32d94b1 100644 --- a/modules/nf-core/modules/bcftools/consensus/main.nf +++ b/modules/nf-core/bcftools/consensus/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_CONSENSUS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) + conda "bioconda::bcftools=1.16" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': - 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': + 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" input: tuple val(meta), path(vcf), path(tbi), path(fasta) diff --git a/modules/nf-core/modules/bcftools/consensus/meta.yml b/modules/nf-core/bcftools/consensus/meta.yml similarity index 100% rename from modules/nf-core/modules/bcftools/consensus/meta.yml rename to modules/nf-core/bcftools/consensus/meta.yml diff --git a/modules/nf-core/modules/bcftools/norm/main.nf b/modules/nf-core/bcftools/filter/main.nf similarity index 68% rename from modules/nf-core/modules/bcftools/norm/main.nf rename to modules/nf-core/bcftools/filter/main.nf index 96f306bc..4e02009d 100644 --- a/modules/nf-core/modules/bcftools/norm/main.nf +++ b/modules/nf-core/bcftools/filter/main.nf @@ -1,19 +1,18 @@ -process BCFTOOLS_NORM { +process BCFTOOLS_FILTER { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) + conda "bioconda::bcftools=1.16" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': - 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': + 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" input: - tuple val(meta), path(vcf), path(tbi) - path(fasta) + tuple val(meta), path(vcf) output: - tuple val(meta), path("*.gz") , emit: vcf - path "versions.yml" , emit: versions + tuple val(meta), path("*.gz"), emit: vcf + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -22,12 +21,10 @@ process BCFTOOLS_NORM { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - bcftools norm \\ - --fasta-ref ${fasta} \\ + bcftools filter \\ --output ${prefix}.vcf.gz \\ $args \\ - --threads $task.cpus \\ - ${vcf} + $vcf cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -37,6 +34,7 @@ process BCFTOOLS_NORM { stub: def prefix = task.ext.prefix ?: "${meta.id}" + """ touch ${prefix}.vcf.gz diff --git a/modules/nf-core/modules/bcftools/filter/meta.yml b/modules/nf-core/bcftools/filter/meta.yml similarity index 100% rename from modules/nf-core/modules/bcftools/filter/meta.yml rename to modules/nf-core/bcftools/filter/meta.yml diff --git a/modules/nf-core/modules/bcftools/mpileup/main.nf b/modules/nf-core/bcftools/mpileup/main.nf similarity index 66% rename from modules/nf-core/modules/bcftools/mpileup/main.nf rename to modules/nf-core/bcftools/mpileup/main.nf index b7795bfc..c9e42c4d 100644 --- a/modules/nf-core/modules/bcftools/mpileup/main.nf +++ b/modules/nf-core/bcftools/mpileup/main.nf @@ -2,22 +2,22 @@ process BCFTOOLS_MPILEUP { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) + conda "bioconda::bcftools=1.16" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': - 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': + 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(bam), path(intervals) path fasta val save_mpileup output: - tuple val(meta), path("*.gz") , emit: vcf - tuple val(meta), path("*.tbi") , emit: tbi - tuple val(meta), path("*stats.txt"), emit: stats - tuple val(meta), path("*.mpileup") , emit: mpileup, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*vcf.gz") , emit: vcf + tuple val(meta), path("*vcf.gz.tbi") , emit: tbi + tuple val(meta), path("*stats.txt") , emit: stats + tuple val(meta), path("*.mpileup.gz"), emit: mpileup, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -28,6 +28,8 @@ process BCFTOOLS_MPILEUP { def args3 = task.ext.args3 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def mpileup = save_mpileup ? "| tee ${prefix}.mpileup" : "" + def bgzip_mpileup = save_mpileup ? "bgzip ${prefix}.mpileup" : "" + def intervals = intervals ? "-T ${intervals}" : "" """ echo "${meta.id}" > sample_name.list @@ -36,11 +38,14 @@ process BCFTOOLS_MPILEUP { --fasta-ref $fasta \\ $args \\ $bam \\ + $intervals \\ $mpileup \\ | bcftools call --output-type v $args2 \\ | bcftools reheader --samples sample_name.list \\ | bcftools view --output-file ${prefix}.vcf.gz --output-type z $args3 + $bgzip_mpileup + tabix -p vcf -f ${prefix}.vcf.gz bcftools stats ${prefix}.vcf.gz > ${prefix}.bcftools_stats.txt diff --git a/modules/nf-core/modules/bcftools/mpileup/meta.yml b/modules/nf-core/bcftools/mpileup/meta.yml similarity index 83% rename from modules/nf-core/modules/bcftools/mpileup/meta.yml rename to modules/nf-core/bcftools/mpileup/meta.yml index d10dac14..5619a6f5 100644 --- a/modules/nf-core/modules/bcftools/mpileup/meta.yml +++ b/modules/nf-core/bcftools/mpileup/meta.yml @@ -22,6 +22,9 @@ input: type: file description: Input BAM file pattern: "*.{bam}" + - intervals: + type: file + description: Input intervals file. A file (commonly '.bed') containing regions to subset - fasta: type: file description: FASTA reference file @@ -29,7 +32,6 @@ input: - save_mpileup: type: boolean description: Save mpileup file generated by bcftools mpileup - patter: "*.mpileup" output: - meta: type: map @@ -43,11 +45,15 @@ output: - tbi: type: file description: tabix index file - pattern: "*.{tbi}" + pattern: "*.{vcf.gz.tbi}" - stats: type: file description: Text output file containing stats pattern: "*{stats.txt}" + - mpileup: + type: file + description: mpileup gzipped output for all positions + pattern: "{*.mpileup.gz}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf new file mode 100644 index 00000000..90387d6c --- /dev/null +++ b/modules/nf-core/bcftools/norm/main.nf @@ -0,0 +1,60 @@ +process BCFTOOLS_NORM { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::bcftools=1.16" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': + 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" + + input: + tuple val(meta), path(vcf), path(tbi) + path(fasta) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + + """ + bcftools norm \\ + --fasta-ref ${fasta} \\ + --output ${prefix}.${extension}\\ + $args \\ + --threads $task.cpus \\ + ${vcf} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml similarity index 86% rename from modules/nf-core/modules/bcftools/norm/meta.yml rename to modules/nf-core/bcftools/norm/meta.yml index 2b3c8eae..c3ea2c03 100644 --- a/modules/nf-core/modules/bcftools/norm/meta.yml +++ b/modules/nf-core/bcftools/norm/meta.yml @@ -42,8 +42,8 @@ output: e.g. [ id:'test', single_end:false ] - vcf: type: file - description: VCF normalized output file - pattern: "*.vcf.gz" + description: One of uncompressed VCF (.vcf), compressed VCF (.vcf.gz), compressed BCF (.bcf.gz) or uncompressed BCF (.bcf) normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/modules/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf similarity index 88% rename from modules/nf-core/modules/bcftools/query/main.nf rename to modules/nf-core/bcftools/query/main.nf index 5de34a9e..5a917b3e 100644 --- a/modules/nf-core/modules/bcftools/query/main.nf +++ b/modules/nf-core/bcftools/query/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_QUERY { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) + conda "bioconda::bcftools=1.16" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': - 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': + 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" input: tuple val(meta), path(vcf), path(tbi) diff --git a/modules/nf-core/modules/bcftools/query/meta.yml b/modules/nf-core/bcftools/query/meta.yml similarity index 100% rename from modules/nf-core/modules/bcftools/query/meta.yml rename to modules/nf-core/bcftools/query/meta.yml diff --git a/modules/nf-core/bcftools/sort/main.nf b/modules/nf-core/bcftools/sort/main.nf new file mode 100644 index 00000000..9ae3253b --- /dev/null +++ b/modules/nf-core/bcftools/sort/main.nf @@ -0,0 +1,60 @@ +process BCFTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::bcftools=1.16" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': + 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + """ + bcftools \\ + sort \\ + --output ${prefix}.${extension} \\ + $args \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bcftools/sort/meta.yml b/modules/nf-core/bcftools/sort/meta.yml similarity index 100% rename from modules/nf-core/modules/bcftools/sort/meta.yml rename to modules/nf-core/bcftools/sort/meta.yml diff --git a/modules/nf-core/bcftools/stats/main.nf b/modules/nf-core/bcftools/stats/main.nf new file mode 100644 index 00000000..51e9c91c --- /dev/null +++ b/modules/nf-core/bcftools/stats/main.nf @@ -0,0 +1,54 @@ +process BCFTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "bioconda::bcftools=1.16" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1': + 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }" + + input: + tuple val(meta), path(vcf), path(tbi) + path regions + path targets + path samples + + output: + tuple val(meta), path("*stats.txt"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + """ + bcftools stats \\ + $args \\ + $regions_file \\ + $targets_file \\ + $samples_file \\ + $vcf > ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bcftools/stats/meta.yml b/modules/nf-core/bcftools/stats/meta.yml similarity index 63% rename from modules/nf-core/modules/bcftools/stats/meta.yml rename to modules/nf-core/bcftools/stats/meta.yml index 304b88ec..f7afcd50 100644 --- a/modules/nf-core/modules/bcftools/stats/meta.yml +++ b/modules/nf-core/bcftools/stats/meta.yml @@ -23,6 +23,24 @@ input: type: file description: VCF input file pattern: "*.{vcf}" + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. Optional: only required when parameter regions is chosen. + pattern: "*.tbi" + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. (VCF, BED or tab-delimited) + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon tbi index files) + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' output: - meta: type: map @@ -40,3 +58,4 @@ output: authors: - "@joseespinosa" - "@drpatelh" + - "@SusiJo" diff --git a/modules/nf-core/modules/bedtools/getfasta/main.nf b/modules/nf-core/bedtools/getfasta/main.nf similarity index 90% rename from modules/nf-core/modules/bedtools/getfasta/main.nf rename to modules/nf-core/bedtools/getfasta/main.nf index 4ce8c01e..57e7f0de 100644 --- a/modules/nf-core/modules/bedtools/getfasta/main.nf +++ b/modules/nf-core/bedtools/getfasta/main.nf @@ -1,8 +1,8 @@ process BEDTOOLS_GETFASTA { tag "$bed" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + conda "bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" diff --git a/modules/nf-core/modules/bedtools/getfasta/meta.yml b/modules/nf-core/bedtools/getfasta/meta.yml similarity index 100% rename from modules/nf-core/modules/bedtools/getfasta/meta.yml rename to modules/nf-core/bedtools/getfasta/meta.yml diff --git a/modules/nf-core/modules/bedtools/maskfasta/main.nf b/modules/nf-core/bedtools/maskfasta/main.nf similarity index 90% rename from modules/nf-core/modules/bedtools/maskfasta/main.nf rename to modules/nf-core/bedtools/maskfasta/main.nf index 04ba116b..a84a23c1 100644 --- a/modules/nf-core/modules/bedtools/maskfasta/main.nf +++ b/modules/nf-core/bedtools/maskfasta/main.nf @@ -1,8 +1,8 @@ process BEDTOOLS_MASKFASTA { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + conda "bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" diff --git a/modules/nf-core/modules/bedtools/maskfasta/meta.yml b/modules/nf-core/bedtools/maskfasta/meta.yml similarity index 100% rename from modules/nf-core/modules/bedtools/maskfasta/meta.yml rename to modules/nf-core/bedtools/maskfasta/meta.yml diff --git a/modules/nf-core/modules/bedtools/merge/main.nf b/modules/nf-core/bedtools/merge/main.nf similarity index 82% rename from modules/nf-core/modules/bedtools/merge/main.nf rename to modules/nf-core/bedtools/merge/main.nf index 6d1daa03..21b2e645 100644 --- a/modules/nf-core/modules/bedtools/merge/main.nf +++ b/modules/nf-core/bedtools/merge/main.nf @@ -1,8 +1,8 @@ process BEDTOOLS_MERGE { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + conda "bioconda::bedtools=2.30.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' : 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" @@ -20,6 +20,7 @@ process BEDTOOLS_MERGE { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ bedtools \\ merge \\ diff --git a/modules/nf-core/modules/bedtools/merge/meta.yml b/modules/nf-core/bedtools/merge/meta.yml similarity index 100% rename from modules/nf-core/modules/bedtools/merge/meta.yml rename to modules/nf-core/bedtools/merge/meta.yml diff --git a/modules/nf-core/modules/blast/blastn/main.nf b/modules/nf-core/blast/blastn/main.nf similarity index 75% rename from modules/nf-core/modules/blast/blastn/main.nf rename to modules/nf-core/blast/blastn/main.nf index b85f6c8e..5f35422a 100644 --- a/modules/nf-core/modules/blast/blastn/main.nf +++ b/modules/nf-core/blast/blastn/main.nf @@ -2,10 +2,10 @@ process BLAST_BLASTN { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::blast=2.12.0' : null) + conda "bioconda::blast=2.13.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.12.0--pl5262h3289130_0' : - 'quay.io/biocontainers/blast:2.12.0--pl5262h3289130_0' }" + 'https://depot.galaxyproject.org/singularity/blast:2.13.0--hf3cf87c_0' : + 'quay.io/biocontainers/blast:2.13.0--hf3cf87c_0' }" input: tuple val(meta), path(fasta) @@ -22,7 +22,7 @@ process BLAST_BLASTN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - DB=`find -L ./ -name "*.ndb" | sed 's/.ndb//'` + DB=`find -L ./ -name "*.ndb" | sed 's/\\.ndb\$//'` blastn \\ -num_threads $task.cpus \\ -db \$DB \\ diff --git a/modules/nf-core/modules/blast/blastn/meta.yml b/modules/nf-core/blast/blastn/meta.yml similarity index 100% rename from modules/nf-core/modules/blast/blastn/meta.yml rename to modules/nf-core/blast/blastn/meta.yml diff --git a/modules/nf-core/modules/blast/makeblastdb/main.nf b/modules/nf-core/blast/makeblastdb/main.nf similarity index 75% rename from modules/nf-core/modules/blast/makeblastdb/main.nf rename to modules/nf-core/blast/makeblastdb/main.nf index 12208ea8..62abd813 100644 --- a/modules/nf-core/modules/blast/makeblastdb/main.nf +++ b/modules/nf-core/blast/makeblastdb/main.nf @@ -2,10 +2,10 @@ process BLAST_MAKEBLASTDB { tag "$fasta" label 'process_medium' - conda (params.enable_conda ? 'bioconda::blast=2.12.0' : null) + conda "bioconda::blast=2.13.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.12.0--pl5262h3289130_0' : - 'quay.io/biocontainers/blast:2.12.0--pl5262h3289130_0' }" + 'https://depot.galaxyproject.org/singularity/blast:2.13.0--hf3cf87c_0' : + 'quay.io/biocontainers/blast:2.13.0--hf3cf87c_0' }" input: path fasta diff --git a/modules/nf-core/modules/blast/makeblastdb/meta.yml b/modules/nf-core/blast/makeblastdb/meta.yml similarity index 100% rename from modules/nf-core/modules/blast/makeblastdb/meta.yml rename to modules/nf-core/blast/makeblastdb/meta.yml diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf similarity index 77% rename from modules/nf-core/modules/bowtie2/align/main.nf rename to modules/nf-core/bowtie2/align/main.nf index c74e376f..3d851866 100644 --- a/modules/nf-core/modules/bowtie2/align/main.nf +++ b/modules/nf-core/bowtie2/align/main.nf @@ -2,14 +2,14 @@ process BOWTIE2_ALIGN { tag "$meta.id" label "process_high" - conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6" : null) - container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ? - "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" : - "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" }" + conda "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' : + 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }" input: - tuple val(meta), path(reads) - path index + tuple val(meta) , path(reads) + tuple val(meta2), path(index) val save_unaligned val sort_bam @@ -40,8 +40,8 @@ process BOWTIE2_ALIGN { def samtools_command = sort_bam ? 'sort' : 'view' """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"` - [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"` + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 bowtie2 \\ diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml similarity index 92% rename from modules/nf-core/modules/bowtie2/align/meta.yml rename to modules/nf-core/bowtie2/align/meta.yml index 42ba0f96..c8e9a001 100644 --- a/modules/nf-core/modules/bowtie2/align/meta.yml +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -27,6 +27,11 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] - index: type: file description: Bowtie2 genome index files diff --git a/modules/nf-core/modules/bowtie2/build/main.nf b/modules/nf-core/bowtie2/build/main.nf similarity index 80% rename from modules/nf-core/modules/bowtie2/build/main.nf rename to modules/nf-core/bowtie2/build/main.nf index a4da62d0..551893af 100644 --- a/modules/nf-core/modules/bowtie2/build/main.nf +++ b/modules/nf-core/bowtie2/build/main.nf @@ -2,17 +2,17 @@ process BOWTIE2_BUILD { tag "$fasta" label 'process_high' - conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null) + conda "bioconda::bowtie2=2.4.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' : 'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }" input: - path fasta + tuple val(meta), path(fasta) output: - path 'bowtie2' , emit: index - path "versions.yml" , emit: versions + tuple val(meta), path('bowtie2') , emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/modules/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml similarity index 74% rename from modules/nf-core/modules/bowtie2/build/meta.yml rename to modules/nf-core/bowtie2/build/meta.yml index 2da9a217..0240224d 100644 --- a/modules/nf-core/modules/bowtie2/build/meta.yml +++ b/modules/nf-core/bowtie2/build/meta.yml @@ -16,10 +16,20 @@ tools: doi: 10.1038/nmeth.1923 licence: ["GPL-3.0-or-later"] input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] - fasta: type: file description: Input genome fasta file output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] - index: type: file description: Bowtie2 genome index files diff --git a/modules/nf-core/modules/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf similarity index 59% rename from modules/nf-core/modules/cat/fastq/main.nf rename to modules/nf-core/cat/fastq/main.nf index b6854895..8a0b5600 100644 --- a/modules/nf-core/modules/cat/fastq/main.nf +++ b/modules/nf-core/cat/fastq/main.nf @@ -1,8 +1,8 @@ process CAT_FASTQ { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'ubuntu:20.04' }" @@ -20,9 +20,9 @@ process CAT_FASTQ { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def readList = reads.collect{ it.toString() } + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] if (meta.single_end) { - if (readList.size > 1) { + if (readList.size >= 1) { """ cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz @@ -33,7 +33,7 @@ process CAT_FASTQ { """ } } else { - if (readList.size > 2) { + if (readList.size >= 2) { def read1 = [] def read2 = [] readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } @@ -48,4 +48,33 @@ process CAT_FASTQ { """ } } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + } diff --git a/modules/nf-core/modules/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml similarity index 100% rename from modules/nf-core/modules/cat/fastq/meta.yml rename to modules/nf-core/cat/fastq/meta.yml diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf similarity index 79% rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf rename to modules/nf-core/custom/dumpsoftwareversions/main.nf index 12293efc..800a6099 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf @@ -1,11 +1,11 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_low' + label 'process_single' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.12" : null) + conda "bioconda::multiqc=1.14" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml similarity index 100% rename from modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml rename to modules/nf-core/custom/dumpsoftwareversions/meta.yml diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100755 index 00000000..da033408 --- /dev/null +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python + + +"""Provide functions to merge multiple versions.yml files.""" + + +import yaml +import platform +from textwrap import dedent + + +def _make_versions_html(versions): + """Generate a tabular HTML output of all versions for MultiQC.""" + html = [ + dedent( + """\\ + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") + return "\\n".join(html) + + +def main(): + """Load all version files and generate merged output.""" + versions_this_module = {} + versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, + } + + with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + + # aggregate versions by the module name (derived from fully-qualified process name) + versions_by_module = {} + for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + + versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", + } + + versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), + } + + with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) + with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + + with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) + + +if __name__ == "__main__": + main() diff --git a/modules/nf-core/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf new file mode 100644 index 00000000..580f87fe --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/main.nf @@ -0,0 +1,44 @@ +process CUSTOM_GETCHROMSIZES { + tag "$fasta" + label 'process_single' + + conda "bioconda::samtools=1.16.1" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path ("*.sizes"), emit: sizes + tuple val(meta), path ("*.fai") , emit: fai + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools faidx $fasta + cut -f 1,2 ${fasta}.fai > ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.fai + touch ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/custom/getchromsizes/meta.yml b/modules/nf-core/custom/getchromsizes/meta.yml similarity index 62% rename from modules/nf-core/modules/custom/getchromsizes/meta.yml rename to modules/nf-core/custom/getchromsizes/meta.yml index ee6c2571..219ca1d8 100644 --- a/modules/nf-core/modules/custom/getchromsizes/meta.yml +++ b/modules/nf-core/custom/getchromsizes/meta.yml @@ -14,12 +14,22 @@ tools: licence: ["MIT"] input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - fasta: type: file description: FASTA file - pattern: "*.{fasta}" + pattern: "*.{fa,fasta,fna,fas}" output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - sizes: type: file description: File containing chromosome lengths @@ -28,11 +38,16 @@ output: type: file description: FASTA index file pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" - versions: type: file - description: File containing software version + description: File containing software versions pattern: "versions.yml" authors: - "@tamara-hodgetts" - "@chris-cheshire" + - "@muffato" diff --git a/modules/nf-core/modules/fastp/main.nf b/modules/nf-core/fastp/main.nf similarity index 67% rename from modules/nf-core/modules/fastp/main.nf rename to modules/nf-core/fastp/main.nf index 120392c5..5eeb9b09 100644 --- a/modules/nf-core/modules/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -2,13 +2,14 @@ process FASTP { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::fastp=0.23.2' : null) + conda "bioconda::fastp=0.23.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/fastp:0.23.2--h79da9fb_0' : 'quay.io/biocontainers/fastp:0.23.2--h79da9fb_0' }" input: tuple val(meta), path(reads) + path adapter_fasta val save_trimmed_fail val save_merged @@ -26,28 +27,53 @@ process FASTP { script: def args = task.ext.args ?: '' - // Added soft-links to original fastqs for consistent naming in MultiQC def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> ${prefix}.fastp.log \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { """ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + fastp \\ --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.fastp.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $args \\ 2> ${prefix}.fastp.log + cat <<-END_VERSIONS > versions.yml "${task.process}": fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") END_VERSIONS """ } else { - def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' """ [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz @@ -59,6 +85,7 @@ process FASTP { --out2 ${prefix}_2.fastp.fastq.gz \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ + $adapter_list \\ $fail_fastq \\ $merge_fastq \\ --thread $task.cpus \\ diff --git a/modules/nf-core/modules/fastp/meta.yml b/modules/nf-core/fastp/meta.yml similarity index 77% rename from modules/nf-core/modules/fastp/meta.yml rename to modules/nf-core/fastp/meta.yml index 2bd2b1a9..197ea7ca 100644 --- a/modules/nf-core/modules/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -9,19 +9,24 @@ tools: description: | A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. documentation: https://github.com/OpenGene/fastp - doi: https://doi.org/10.1093/bioinformatics/bty560 + doi: 10.1093/bioinformatics/bty560 licence: ["MIT"] input: - meta: type: map description: | - Groovy Map containing sample information + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. e.g. [ id:'test', single_end:false ] - reads: type: file description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" - save_trimmed_fail: type: boolean description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf new file mode 100644 index 00000000..9ae58381 --- /dev/null +++ b/modules/nf-core/fastqc/main.nf @@ -0,0 +1,51 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::fastqc=0.11.9" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : + 'quay.io/biocontainers/fastqc:0.11.9--0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + fastqc $args --threads $task.cpus $renamed_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml similarity index 100% rename from modules/nf-core/modules/fastqc/meta.yml rename to modules/nf-core/fastqc/meta.yml diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/gunzip/main.nf similarity index 92% rename from modules/nf-core/modules/gunzip/main.nf rename to modules/nf-core/gunzip/main.nf index 70367049..d906034c 100644 --- a/modules/nf-core/modules/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -1,8 +1,8 @@ process GUNZIP { tag "$archive" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + conda "conda-forge::sed=4.7" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : 'ubuntu:20.04' }" diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 00000000..2e0e4054 --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,34 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/modules/ivar/consensus/main.nf b/modules/nf-core/ivar/consensus/main.nf similarity index 85% rename from modules/nf-core/modules/ivar/consensus/main.nf rename to modules/nf-core/ivar/consensus/main.nf index db6301e9..827463dd 100644 --- a/modules/nf-core/modules/ivar/consensus/main.nf +++ b/modules/nf-core/ivar/consensus/main.nf @@ -2,10 +2,10 @@ process IVAR_CONSENSUS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::ivar=1.3.1" : null) + conda "bioconda::ivar=1.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ivar:1.3.1--h089eab3_0' : - 'quay.io/biocontainers/ivar:1.3.1--h089eab3_0' }" + 'https://depot.galaxyproject.org/singularity/ivar:1.4--h6b7c446_1' : + 'quay.io/biocontainers/ivar:1.4--h6b7c446_1' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/modules/ivar/consensus/meta.yml b/modules/nf-core/ivar/consensus/meta.yml similarity index 100% rename from modules/nf-core/modules/ivar/consensus/meta.yml rename to modules/nf-core/ivar/consensus/meta.yml diff --git a/modules/nf-core/modules/ivar/trim/main.nf b/modules/nf-core/ivar/trim/main.nf similarity index 80% rename from modules/nf-core/modules/ivar/trim/main.nf rename to modules/nf-core/ivar/trim/main.nf index 819aa5dd..a7dc404d 100644 --- a/modules/nf-core/modules/ivar/trim/main.nf +++ b/modules/nf-core/ivar/trim/main.nf @@ -2,10 +2,10 @@ process IVAR_TRIM { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::ivar=1.3.1" : null) + conda "bioconda::ivar=1.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ivar:1.3.1--h089eab3_0' : - 'quay.io/biocontainers/ivar:1.3.1--h089eab3_0' }" + 'https://depot.galaxyproject.org/singularity/ivar:1.4--h6b7c446_1' : + 'quay.io/biocontainers/ivar:1.4--h6b7c446_1' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/modules/ivar/trim/meta.yml b/modules/nf-core/ivar/trim/meta.yml similarity index 100% rename from modules/nf-core/modules/ivar/trim/meta.yml rename to modules/nf-core/ivar/trim/meta.yml diff --git a/modules/nf-core/modules/ivar/variants/main.nf b/modules/nf-core/ivar/variants/main.nf similarity index 85% rename from modules/nf-core/modules/ivar/variants/main.nf rename to modules/nf-core/ivar/variants/main.nf index b86042f0..ae647f2b 100644 --- a/modules/nf-core/modules/ivar/variants/main.nf +++ b/modules/nf-core/ivar/variants/main.nf @@ -2,10 +2,10 @@ process IVAR_VARIANTS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::ivar=1.3.1" : null) + conda "bioconda::ivar=1.4" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ivar:1.3.1--h089eab3_0' : - 'quay.io/biocontainers/ivar:1.3.1--h089eab3_0' }" + 'https://depot.galaxyproject.org/singularity/ivar:1.4--h6b7c446_1' : + 'quay.io/biocontainers/ivar:1.4--h6b7c446_1' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/modules/ivar/variants/meta.yml b/modules/nf-core/ivar/variants/meta.yml similarity index 100% rename from modules/nf-core/modules/ivar/variants/meta.yml rename to modules/nf-core/ivar/variants/meta.yml diff --git a/modules/nf-core/modules/kraken2/kraken2/main.nf b/modules/nf-core/kraken2/kraken2/main.nf similarity index 62% rename from modules/nf-core/modules/kraken2/kraken2/main.nf rename to modules/nf-core/kraken2/kraken2/main.nf index d4000233..5901064e 100644 --- a/modules/nf-core/modules/kraken2/kraken2/main.nf +++ b/modules/nf-core/kraken2/kraken2/main.nf @@ -2,7 +2,7 @@ process KRAKEN2_KRAKEN2 { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::kraken2=2.1.2 conda-forge::pigz=2.6' : null) + conda "bioconda::kraken2=2.1.2 conda-forge::pigz=2.6" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' : 'quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' }" @@ -14,11 +14,11 @@ process KRAKEN2_KRAKEN2 { val save_reads_assignment output: - tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq - tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq - tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment - tuple val(meta), path('*report.txt') , emit: report - path "versions.yml" , emit: versions + tuple val(meta), path('*.classified{.,_}*') , optional:true, emit: classified_reads_fastq + tuple val(meta), path('*.unclassified{.,_}*') , optional:true, emit: unclassified_reads_fastq + tuple val(meta), path('*classifiedreads.txt') , optional:true, emit: classified_reads_assignment + tuple val(meta), path('*report.txt') , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -29,9 +29,9 @@ process KRAKEN2_KRAKEN2 { def paired = meta.single_end ? "" : "--paired" def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" - def classified_command = save_output_fastqs ? "--classified-out ${classified}" : "" - def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" - def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "" + def classified_option = save_output_fastqs ? "--classified-out ${classified}" : "" + def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" + def readclassification_option = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "--output /dev/null" def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" """ @@ -40,9 +40,9 @@ process KRAKEN2_KRAKEN2 { --threads $task.cpus \\ --report ${prefix}.kraken2.report.txt \\ --gzip-compressed \\ - $unclassified_command \\ - $classified_command \\ - $readclassification_command \\ + $unclassified_option \\ + $classified_option \\ + $readclassification_option \\ $paired \\ $args \\ $reads diff --git a/modules/nf-core/modules/kraken2/kraken2/meta.yml b/modules/nf-core/kraken2/kraken2/meta.yml similarity index 100% rename from modules/nf-core/modules/kraken2/kraken2/meta.yml rename to modules/nf-core/kraken2/kraken2/meta.yml diff --git a/modules/nf-core/modules/minia/main.nf b/modules/nf-core/minia/main.nf similarity index 94% rename from modules/nf-core/modules/minia/main.nf rename to modules/nf-core/minia/main.nf index 87dd74f9..fe242920 100644 --- a/modules/nf-core/modules/minia/main.nf +++ b/modules/nf-core/minia/main.nf @@ -2,7 +2,7 @@ process MINIA { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::minia=3.2.6" : null) + conda "bioconda::minia=3.2.6" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/minia:3.2.6--h9a82719_0' : 'quay.io/biocontainers/minia:3.2.6--h9a82719_0' }" diff --git a/modules/nf-core/modules/minia/meta.yml b/modules/nf-core/minia/meta.yml similarity index 100% rename from modules/nf-core/modules/minia/meta.yml rename to modules/nf-core/minia/meta.yml diff --git a/modules/nf-core/modules/bandage/image/meta.yml b/modules/nf-core/modules/bandage/image/meta.yml deleted file mode 100644 index 1e824c4f..00000000 --- a/modules/nf-core/modules/bandage/image/meta.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: bandage_image -description: Render an assembly graph in GFA 1.0 format to PNG and SVG image formats -keywords: - - gfa - - graph - - assembly - - visualisation -tools: - - bandage: - description: | - Bandage - a Bioinformatics Application for Navigating De novo Assembly Graphs Easily - homepage: https://github.com/rrwick/Bandage - documentation: https://github.com/rrwick/Bandage - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - gfa: - type: file - description: Assembly graph in GFA 1.0 format - pattern: "*.gfa" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - png: - type: file - description: Bandage image in PNG format - pattern: "*.png" - - svg: - type: file - description: Bandage image in SVG format - pattern: "*.svg" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@heuermh" diff --git a/modules/nf-core/modules/bcftools/filter/main.nf b/modules/nf-core/modules/bcftools/filter/main.nf deleted file mode 100644 index ef99eda2..00000000 --- a/modules/nf-core/modules/bcftools/filter/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -process BCFTOOLS_FILTER { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': - 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" - - input: - tuple val(meta), path(vcf) - - output: - tuple val(meta), path("*.gz"), emit: vcf - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - bcftools filter \\ - --output ${prefix}.vcf.gz \\ - $args \\ - $vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/bcftools/sort/main.nf b/modules/nf-core/modules/bcftools/sort/main.nf deleted file mode 100644 index 9552b57c..00000000 --- a/modules/nf-core/modules/bcftools/sort/main.nf +++ /dev/null @@ -1,35 +0,0 @@ -process BCFTOOLS_SORT { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': - 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" - - input: - tuple val(meta), path(vcf) - - output: - tuple val(meta), path("*.gz"), emit: vcf - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - bcftools \\ - sort \\ - --output ${prefix}.vcf.gz \\ - $args \\ - $vcf - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/bcftools/stats/main.nf b/modules/nf-core/modules/bcftools/stats/main.nf deleted file mode 100644 index 1e0f3a47..00000000 --- a/modules/nf-core/modules/bcftools/stats/main.nf +++ /dev/null @@ -1,30 +0,0 @@ -process BCFTOOLS_STATS { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': - 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" - - input: - tuple val(meta), path(vcf) - - output: - tuple val(meta), path("*stats.txt"), emit: stats - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - bcftools stats $args $vcf > ${prefix}.bcftools_stats.txt - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100644 index d1390392..00000000 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,89 +0,0 @@ -#!/usr/bin/env python - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") - return "\\n".join(html) - - -versions_this_module = {} -versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, -} - -with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - -# aggregate versions by the module name (derived from fully-qualified process name) -versions_by_module = {} -for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - assert versions_by_module[module] == process_versions, ( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - -versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", -} - -versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), -} - -with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) -with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - -with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) diff --git a/modules/nf-core/modules/custom/getchromsizes/main.nf b/modules/nf-core/modules/custom/getchromsizes/main.nf deleted file mode 100644 index 0eabf3a4..00000000 --- a/modules/nf-core/modules/custom/getchromsizes/main.nf +++ /dev/null @@ -1,32 +0,0 @@ -process CUSTOM_GETCHROMSIZES { - tag "$fasta" - label 'process_low' - - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" - - input: - path fasta - - output: - path '*.sizes' , emit: sizes - path '*.fai' , emit: fai - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - """ - samtools faidx $fasta - cut -f 1,2 ${fasta}.fai > ${fasta}.sizes - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - custom: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf deleted file mode 100644 index 05730368..00000000 --- a/modules/nf-core/modules/fastqc/main.nf +++ /dev/null @@ -1,59 +0,0 @@ -process FASTQC { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : - 'quay.io/biocontainers/fastqc:0.11.9--0' }" - - input: - tuple val(meta), path(reads) - - output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - // Add soft-links to original FastQs for consistent naming in pipeline - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz - fastqc $args --threads $task.cpus ${prefix}.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } else { - """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz - fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ - } - - stub: - def prefix = task.ext.prefix ?: "${meta.id}" - """ - touch ${prefix}.html - touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/gunzip/meta.yml b/modules/nf-core/modules/gunzip/meta.yml deleted file mode 100644 index 4d2ebc84..00000000 --- a/modules/nf-core/modules/gunzip/meta.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: gunzip -description: Compresses and decompresses files. -keywords: - - gunzip - - compression -tools: - - gunzip: - description: | - gzip is a file format and a software application used for file compression and decompression. - documentation: https://www.gnu.org/software/gzip/manual/gzip.html - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" -output: - - gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@jfy133" diff --git a/modules/nf-core/modules/tabix/bgzip/main.nf b/modules/nf-core/modules/tabix/bgzip/main.nf deleted file mode 100644 index 18e83c84..00000000 --- a/modules/nf-core/modules/tabix/bgzip/main.nf +++ /dev/null @@ -1,34 +0,0 @@ -process TABIX_BGZIP { - tag "$meta.id" - label 'process_low' - - conda (params.enable_conda ? 'bioconda::tabix=1.11' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : - 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" - - input: - tuple val(meta), path(input) - - output: - tuple val(meta), path("${prefix}*"), emit: output - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - in_bgzip = input.toString().endsWith(".gz") - command1 = in_bgzip ? '-d' : '-c' - command2 = in_bgzip ? '' : " > ${prefix}.${input.getExtension()}.gz" - """ - bgzip $command1 $args -@${task.cpus} $input $command2 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf deleted file mode 100644 index 29ab10a5..00000000 --- a/modules/nf-core/modules/untar/main.nf +++ /dev/null @@ -1,53 +0,0 @@ -process UNTAR { - tag "$archive" - label 'process_low' - - conda (params.enable_conda ? "conda-forge::sed=4.7" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'ubuntu:20.04' }" - - input: - tuple val(meta), path(archive) - - output: - tuple val(meta), path("$untar"), emit: untar - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - untar = archive.toString() - '.tar.gz' - - """ - mkdir output - - tar \\ - -C output --strip-components 1 \\ - -xzvf \\ - $args \\ - $archive \\ - $args2 - - mv output ${untar} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS - """ - - stub: - untar = archive.toString() - '.tar.gz' - """ - touch $untar - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf similarity index 95% rename from modules/nf-core/modules/mosdepth/main.nf rename to modules/nf-core/mosdepth/main.nf index d7e3c929..be4be831 100644 --- a/modules/nf-core/modules/mosdepth/main.nf +++ b/modules/nf-core/mosdepth/main.nf @@ -2,15 +2,15 @@ process MOSDEPTH { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::mosdepth=0.3.3' : null) + conda "bioconda::mosdepth=0.3.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/mosdepth:0.3.3--hdfd78af_1' : 'quay.io/biocontainers/mosdepth:0.3.3--hdfd78af_1'}" input: - tuple val(meta), path(bam), path(bai) - path bed - path fasta + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(bed) + tuple val(meta3), path(fasta) output: tuple val(meta), path('*.global.dist.txt') , emit: global_txt diff --git a/modules/nf-core/modules/mosdepth/meta.yml b/modules/nf-core/mosdepth/meta.yml similarity index 92% rename from modules/nf-core/modules/mosdepth/meta.yml rename to modules/nf-core/mosdepth/meta.yml index d1e33447..adf3893f 100644 --- a/modules/nf-core/modules/mosdepth/meta.yml +++ b/modules/nf-core/mosdepth/meta.yml @@ -26,10 +26,20 @@ input: type: file description: Index for BAM/CRAM file pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing bed information + e.g. [ id:'test' ] - bed: type: file description: BED file with intersected intervals pattern: "*.{bed}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] - fasta: type: file description: Reference genome FASTA file diff --git a/modules/nf-core/modules/nanoplot/main.nf b/modules/nf-core/nanoplot/main.nf similarity index 64% rename from modules/nf-core/modules/nanoplot/main.nf rename to modules/nf-core/nanoplot/main.nf index 83c0e2ec..ca0d8454 100644 --- a/modules/nf-core/modules/nanoplot/main.nf +++ b/modules/nf-core/nanoplot/main.nf @@ -2,20 +2,20 @@ process NANOPLOT { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? 'bioconda::nanoplot=1.40.0' : null) + conda "bioconda::nanoplot=1.41.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nanoplot:1.40.0--pyhdfd78af_0' : - 'quay.io/biocontainers/nanoplot:1.40.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/nanoplot:1.41.0--pyhdfd78af_0' : + 'quay.io/biocontainers/nanoplot:1.41.0--pyhdfd78af_0' }" input: tuple val(meta), path(ontfile) output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.png") , emit: png - tuple val(meta), path("*.txt") , emit: txt - tuple val(meta), path("*.log") , emit: log - path "versions.yml" , emit: versions + tuple val(meta), path("*.html") , emit: html + tuple val(meta), path("*.png") , optional: true, emit: png + tuple val(meta), path("*.txt") , emit: txt + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/modules/nanoplot/meta.yml b/modules/nf-core/nanoplot/meta.yml similarity index 100% rename from modules/nf-core/modules/nanoplot/meta.yml rename to modules/nf-core/nanoplot/meta.yml diff --git a/modules/nf-core/modules/nextclade/datasetget/main.nf b/modules/nf-core/nextclade/datasetget/main.nf similarity index 86% rename from modules/nf-core/modules/nextclade/datasetget/main.nf rename to modules/nf-core/nextclade/datasetget/main.nf index a9f52c84..ab2e20d3 100644 --- a/modules/nf-core/modules/nextclade/datasetget/main.nf +++ b/modules/nf-core/nextclade/datasetget/main.nf @@ -2,10 +2,10 @@ process NEXTCLADE_DATASETGET { tag "$dataset" label 'process_low' - conda (params.enable_conda ? "bioconda::nextclade=2.2.0" : null) + conda "bioconda::nextclade=2.12.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nextclade:2.2.0--h9ee0642_0' : - 'quay.io/biocontainers/nextclade:2.2.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/nextclade:2.12.0--h9ee0642_0' : + 'quay.io/biocontainers/nextclade:2.12.0--h9ee0642_0' }" input: val dataset diff --git a/modules/nf-core/modules/nextclade/datasetget/meta.yml b/modules/nf-core/nextclade/datasetget/meta.yml similarity index 99% rename from modules/nf-core/modules/nextclade/datasetget/meta.yml rename to modules/nf-core/nextclade/datasetget/meta.yml index d5f65cda..16ee59f7 100644 --- a/modules/nf-core/modules/nextclade/datasetget/meta.yml +++ b/modules/nf-core/nextclade/datasetget/meta.yml @@ -10,7 +10,7 @@ tools: homepage: https://github.com/nextstrain/nextclade documentation: https://github.com/nextstrain/nextclade tool_dev_url: https://github.com/nextstrain/nextclade - doi: "" + licence: ["MIT"] input: diff --git a/modules/nf-core/modules/nextclade/run/main.nf b/modules/nf-core/nextclade/run/main.nf similarity index 91% rename from modules/nf-core/modules/nextclade/run/main.nf rename to modules/nf-core/nextclade/run/main.nf index 22f72781..960892b7 100644 --- a/modules/nf-core/modules/nextclade/run/main.nf +++ b/modules/nf-core/nextclade/run/main.nf @@ -2,10 +2,10 @@ process NEXTCLADE_RUN { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::nextclade=2.2.0" : null) + conda "bioconda::nextclade=2.12.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nextclade:2.2.0--h9ee0642_0' : - 'quay.io/biocontainers/nextclade:2.2.0--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/nextclade:2.12.0--h9ee0642_0' : + 'quay.io/biocontainers/nextclade:2.12.0--h9ee0642_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/modules/nextclade/run/meta.yml b/modules/nf-core/nextclade/run/meta.yml similarity index 99% rename from modules/nf-core/modules/nextclade/run/meta.yml rename to modules/nf-core/nextclade/run/meta.yml index d8f1eb37..3271fca4 100644 --- a/modules/nf-core/modules/nextclade/run/meta.yml +++ b/modules/nf-core/nextclade/run/meta.yml @@ -10,7 +10,7 @@ tools: homepage: https://github.com/nextstrain/nextclade documentation: https://github.com/nextstrain/nextclade tool_dev_url: https://github.com/nextstrain/nextclade - doi: "" + licence: ["MIT"] input: diff --git a/modules/nf-core/modules/pangolin/main.nf b/modules/nf-core/pangolin/main.nf similarity index 84% rename from modules/nf-core/modules/pangolin/main.nf rename to modules/nf-core/pangolin/main.nf index 6414b5d3..e2db37b2 100644 --- a/modules/nf-core/modules/pangolin/main.nf +++ b/modules/nf-core/pangolin/main.nf @@ -2,10 +2,10 @@ process PANGOLIN { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::pangolin=4.1.1' : null) + conda "bioconda::pangolin=4.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pangolin:4.1.1--pyhdfd78af_0' : - 'quay.io/biocontainers/pangolin:4.1.1--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pangolin:4.2--pyhdfd78af_1' : + 'quay.io/biocontainers/pangolin:4.2--pyhdfd78af_1' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/modules/pangolin/meta.yml b/modules/nf-core/pangolin/meta.yml similarity index 100% rename from modules/nf-core/modules/pangolin/meta.yml rename to modules/nf-core/pangolin/meta.yml diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf b/modules/nf-core/picard/collectmultiplemetrics/main.nf similarity index 83% rename from modules/nf-core/modules/picard/collectmultiplemetrics/main.nf rename to modules/nf-core/picard/collectmultiplemetrics/main.nf index 63f4e872..ed88dbe7 100644 --- a/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf +++ b/modules/nf-core/picard/collectmultiplemetrics/main.nf @@ -1,16 +1,16 @@ process PICARD_COLLECTMULTIPLEMETRICS { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" input: - tuple val(meta), path(bam) - path fasta - path fai + tuple val(meta) , path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*_metrics"), emit: metrics @@ -24,15 +24,15 @@ process PICARD_COLLECTMULTIPLEMETRICS { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ picard \\ - -Xmx${avail_mem}g \\ + -Xmx${avail_mem}M \\ CollectMultipleMetrics \\ $args \\ --INPUT $bam \\ diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml b/modules/nf-core/picard/collectmultiplemetrics/meta.yml similarity index 75% rename from modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml rename to modules/nf-core/picard/collectmultiplemetrics/meta.yml index c11b02cf..22656080 100644 --- a/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml +++ b/modules/nf-core/picard/collectmultiplemetrics/meta.yml @@ -23,11 +23,25 @@ input: e.g. [ id:'test', single_end:false ] - bam: type: file - description: BAM file - pattern: "*.{bam}" + description: SAM/BAM/CRAM file + pattern: "*.{sam,bam,cram}" + - bai: + type: file + description: Optional SAM/BAM/CRAM file index + pattern: "*.{sai,bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] - fasta: type: file description: Genome fasta file + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] - fai: type: file description: Index of FASTA file. Only needed when fasta is supplied. diff --git a/modules/nf-core/modules/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf similarity index 81% rename from modules/nf-core/modules/picard/markduplicates/main.nf rename to modules/nf-core/picard/markduplicates/main.nf index 4e559fea..1fe6ee2d 100644 --- a/modules/nf-core/modules/picard/markduplicates/main.nf +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -2,13 +2,15 @@ process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + conda "bioconda::picard=3.0.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' : + 'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }" input: tuple val(meta), path(bam) + path fasta + path fai output: tuple val(meta), path("*.bam") , emit: bam @@ -22,19 +24,20 @@ process PICARD_MARKDUPLICATES { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def avail_mem = 3 + def avail_mem = 3072 if (!task.memory) { log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' } else { - avail_mem = task.memory.giga + avail_mem = (task.memory.mega*0.8).intValue() } """ picard \\ - -Xmx${avail_mem}g \\ + -Xmx${avail_mem}M \\ MarkDuplicates \\ $args \\ --INPUT $bam \\ --OUTPUT ${prefix}.bam \\ + --REFERENCE_SEQUENCE $fasta \\ --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/modules/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml similarity index 85% rename from modules/nf-core/modules/picard/markduplicates/meta.yml rename to modules/nf-core/picard/markduplicates/meta.yml index 842817bc..3f2357bb 100644 --- a/modules/nf-core/modules/picard/markduplicates/meta.yml +++ b/modules/nf-core/picard/markduplicates/meta.yml @@ -24,7 +24,15 @@ input: - bam: type: file description: BAM file - pattern: "*.{bam}" + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" + - fai: + type: file + description: Reference genome fasta index + pattern: "*.{fai}" output: - meta: type: map diff --git a/modules/nf-core/modules/plasmidid/main.nf b/modules/nf-core/plasmidid/main.nf similarity index 95% rename from modules/nf-core/modules/plasmidid/main.nf rename to modules/nf-core/plasmidid/main.nf index 7911d806..ff7165b1 100644 --- a/modules/nf-core/modules/plasmidid/main.nf +++ b/modules/nf-core/plasmidid/main.nf @@ -2,7 +2,7 @@ process PLASMIDID { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::plasmidid=1.6.5' : null) + conda "bioconda::plasmidid=1.6.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/plasmidid:1.6.5--hdfd78af_0' : 'quay.io/biocontainers/plasmidid:1.6.5--hdfd78af_0' }" diff --git a/modules/nf-core/modules/plasmidid/meta.yml b/modules/nf-core/plasmidid/meta.yml similarity index 99% rename from modules/nf-core/modules/plasmidid/meta.yml rename to modules/nf-core/plasmidid/meta.yml index 4887cb9a..4e0169d8 100644 --- a/modules/nf-core/modules/plasmidid/meta.yml +++ b/modules/nf-core/plasmidid/meta.yml @@ -10,7 +10,7 @@ tools: homepage: https://github.com/BU-ISCIII/plasmidID/wiki documentation: https://github.com/BU-ISCIII/plasmidID#readme tool_dev_url: https://github.com/BU-ISCIII/plasmidID - doi: "" + licence: ["GPL v3"] input: diff --git a/modules/nf-core/modules/pycoqc/main.nf b/modules/nf-core/pycoqc/main.nf similarity index 67% rename from modules/nf-core/modules/pycoqc/main.nf rename to modules/nf-core/pycoqc/main.nf index a5e6fcb2..02092a4b 100644 --- a/modules/nf-core/modules/pycoqc/main.nf +++ b/modules/nf-core/pycoqc/main.nf @@ -2,30 +2,31 @@ process PYCOQC { tag "$summary" label 'process_medium' - conda (params.enable_conda ? "bioconda::pycoqc=2.5.2" : null) + conda "bioconda::pycoqc=2.5.2" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/pycoqc:2.5.2--py_0' : 'quay.io/biocontainers/pycoqc:2.5.2--py_0' }" input: - path summary + tuple val(meta), path(summary) output: - path "*.html" , emit: html - path "*.json" , emit: json - path "versions.yml" , emit: versions + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.json"), emit: json + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ pycoQC \\ $args \\ -f $summary \\ - -o pycoqc.html \\ - -j pycoqc.json + -o ${prefix}.html \\ + -j ${prefix}.json cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/pycoqc/meta.yml b/modules/nf-core/pycoqc/meta.yml similarity index 100% rename from modules/nf-core/modules/pycoqc/meta.yml rename to modules/nf-core/pycoqc/meta.yml diff --git a/modules/nf-core/modules/quast/main.nf b/modules/nf-core/quast/main.nf similarity index 94% rename from modules/nf-core/modules/quast/main.nf rename to modules/nf-core/quast/main.nf index 5585491b..a7f9a4c8 100644 --- a/modules/nf-core/modules/quast/main.nf +++ b/modules/nf-core/quast/main.nf @@ -1,7 +1,7 @@ process QUAST { label 'process_medium' - conda (params.enable_conda ? 'bioconda::quast=5.2.0' : null) + conda "bioconda::quast=5.2.0" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/quast:5.2.0--py39pl5321h2add14b_1' : 'quay.io/biocontainers/quast:5.2.0--py39pl5321h2add14b_1' }" diff --git a/modules/nf-core/modules/quast/meta.yml b/modules/nf-core/quast/meta.yml similarity index 85% rename from modules/nf-core/modules/quast/meta.yml rename to modules/nf-core/quast/meta.yml index f4bc38fc..2189bb3e 100644 --- a/modules/nf-core/modules/quast/meta.yml +++ b/modules/nf-core/quast/meta.yml @@ -9,7 +9,7 @@ tools: description: | QUAST calculates quality metrics for genome assemblies homepage: http://bioinf.spbau.ru/quast - doi: https://doi.org/10.1093/bioinformatics/btt086 + doi: 10.1093/bioinformatics/btt086 licence: ["GPL-2.0-only"] input: - consensus: @@ -34,8 +34,11 @@ output: - quast: type: directory description: Directory containing complete quast report - pattern: "{prefix}.lineage_report.csv" + pattern: "{prefix}/" - report: + type: file + description: tab-separated version of the summary, suitable for spreadsheets and mqc + pattern: "report.tsv" - versions: type: file diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf similarity index 79% rename from modules/nf-core/modules/samtools/flagstat/main.nf rename to modules/nf-core/samtools/flagstat/main.nf index 03ec2dcf..2120cd7d 100644 --- a/modules/nf-core/modules/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -1,11 +1,11 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" input: tuple val(meta), path(bam), path(bai) @@ -23,7 +23,7 @@ process SAMTOOLS_FLAGSTAT { """ samtools \\ flagstat \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ $bam \\ > ${prefix}.flagstat diff --git a/modules/nf-core/modules/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml similarity index 95% rename from modules/nf-core/modules/samtools/flagstat/meta.yml rename to modules/nf-core/samtools/flagstat/meta.yml index 95269063..954225df 100644 --- a/modules/nf-core/modules/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -14,7 +14,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/modules/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf similarity index 81% rename from modules/nf-core/modules/samtools/idxstats/main.nf rename to modules/nf-core/samtools/idxstats/main.nf index 4b245419..a7b87d8b 100644 --- a/modules/nf-core/modules/samtools/idxstats/main.nf +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -1,11 +1,11 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" input: tuple val(meta), path(bam), path(bai) @@ -24,6 +24,7 @@ process SAMTOOLS_IDXSTATS { """ samtools \\ idxstats \\ + --threads ${task.cpus-1} \\ $bam \\ > ${prefix}.idxstats diff --git a/modules/nf-core/modules/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml similarity index 95% rename from modules/nf-core/modules/samtools/idxstats/meta.yml rename to modules/nf-core/samtools/idxstats/meta.yml index 3710ab88..dda87e1e 100644 --- a/modules/nf-core/modules/samtools/idxstats/meta.yml +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -15,7 +15,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/modules/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf similarity index 88% rename from modules/nf-core/modules/samtools/index/main.nf rename to modules/nf-core/samtools/index/main.nf index e04e63e8..8b95687a 100644 --- a/modules/nf-core/modules/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" input: tuple val(meta), path(input) diff --git a/modules/nf-core/modules/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml similarity index 95% rename from modules/nf-core/modules/samtools/index/meta.yml rename to modules/nf-core/samtools/index/meta.yml index e5cadbc2..8bd2fa6f 100644 --- a/modules/nf-core/modules/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf similarity index 86% rename from modules/nf-core/modules/samtools/sort/main.nf rename to modules/nf-core/samtools/sort/main.nf index b4fc1cbe..84c167cd 100644 --- a/modules/nf-core/modules/samtools/sort/main.nf +++ b/modules/nf-core/samtools/sort/main.nf @@ -2,16 +2,17 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" input: tuple val(meta), path(bam) output: tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path("*.csi"), emit: csi, optional: true path "versions.yml" , emit: versions when: diff --git a/modules/nf-core/modules/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml similarity index 88% rename from modules/nf-core/modules/samtools/sort/meta.yml rename to modules/nf-core/samtools/sort/meta.yml index a820c55a..07328431 100644 --- a/modules/nf-core/modules/samtools/sort/meta.yml +++ b/modules/nf-core/samtools/sort/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -39,6 +39,10 @@ output: type: file description: File containing software versions pattern: "versions.yml" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/modules/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf similarity index 80% rename from modules/nf-core/modules/samtools/stats/main.nf rename to modules/nf-core/samtools/stats/main.nf index c913bc5e..0a2a3640 100644 --- a/modules/nf-core/modules/samtools/stats/main.nf +++ b/modules/nf-core/samtools/stats/main.nf @@ -1,14 +1,14 @@ process SAMTOOLS_STATS { tag "$meta.id" - label 'process_low' + label 'process_single' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" input: - tuple val(meta), path(bam), path(bai) + tuple val(meta), path(input), path(input_index) path fasta output: @@ -25,9 +25,9 @@ process SAMTOOLS_STATS { """ samtools \\ stats \\ - --threads ${task.cpus-1} \\ + --threads ${task.cpus} \\ ${reference} \\ - ${bam} \\ + ${input} \\ > ${prefix}.stats cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/modules/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml similarity index 83% rename from modules/nf-core/modules/samtools/stats/meta.yml rename to modules/nf-core/samtools/stats/meta.yml index cac50b1c..1d68a5d8 100644 --- a/modules/nf-core/modules/samtools/stats/meta.yml +++ b/modules/nf-core/samtools/stats/meta.yml @@ -13,7 +13,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -23,13 +23,13 @@ input: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - input: - type: file - description: BAM/CRAM file from alignment - pattern: "*.{bam,cram}" + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" - input_index: - type: file - description: BAI/CRAI file from alignment - pattern: "*.{bai,crai}" + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" - fasta: type: optional file description: Reference file the CRAM was created with diff --git a/modules/nf-core/modules/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf similarity index 54% rename from modules/nf-core/modules/samtools/view/main.nf rename to modules/nf-core/samtools/view/main.nf index 55194e88..729c85e5 100644 --- a/modules/nf-core/modules/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -1,20 +1,25 @@ process SAMTOOLS_VIEW { tag "$meta.id" - label 'process_medium' + label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + conda "bioconda::samtools=1.16.1" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'quay.io/biocontainers/samtools:1.16.1--h6899075_1' }" input: tuple val(meta), path(input), path(index) path fasta + path qname output: - tuple val(meta), path("*.bam") , emit: bam , optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.sam"), emit: sam, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -23,18 +28,23 @@ process SAMTOOLS_VIEW { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def reference = fasta ? "--reference ${fasta} -C" : "" - def file_type = input.getExtension() + def reference = fasta ? "--reference ${fasta}" : "" + def readnames = qname ? "--qname-file ${qname}": "" + def file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ samtools \\ view \\ --threads ${task.cpus-1} \\ ${reference} \\ + ${readnames} \\ $args \\ + -o ${prefix}.${file_type} \\ $input \\ - $args2 \\ - > ${prefix}.${file_type} + $args2 cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml similarity index 62% rename from modules/nf-core/modules/samtools/view/meta.yml rename to modules/nf-core/samtools/view/meta.yml index a8b43ecc..2e597d34 100644 --- a/modules/nf-core/modules/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -12,7 +12,7 @@ tools: short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. These files are generated as output by short read aligners like BWA. homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html + documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] input: @@ -33,6 +33,10 @@ input: type: optional file description: Reference file the CRAM was created with pattern: "*.{fasta,fa}" + - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" output: - meta: type: map @@ -41,12 +45,29 @@ output: e.g. [ id:'test', single_end:false ] - bam: type: file - description: filtered/converted BAM/SAM file - pattern: "*.{bam,sam}" + description: optional filtered/converted BAM file + pattern: "*.{bam}" - cram: type: file - description: filtered/converted CRAM file - pattern: "*.cram" + description: optional filtered/converted CRAM file + pattern: "*.{cram}" + - sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" + # bai, csi, and crai are created with `--write-index` + - bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" + - csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" + - crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" - versions: type: file description: File containing software versions @@ -55,3 +76,4 @@ authors: - "@drpatelh" - "@joseespinosa" - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/modules/spades/main.nf b/modules/nf-core/spades/main.nf similarity index 90% rename from modules/nf-core/modules/spades/main.nf rename to modules/nf-core/spades/main.nf index a467fcd7..2c58069c 100644 --- a/modules/nf-core/modules/spades/main.nf +++ b/modules/nf-core/spades/main.nf @@ -2,14 +2,15 @@ process SPADES { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::spades=3.15.4' : null) + conda "bioconda::spades=3.15.5" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/spades:3.15.4--h95f258a_0' : - 'quay.io/biocontainers/spades:3.15.4--h95f258a_0' }" + 'https://depot.galaxyproject.org/singularity/spades:3.15.5--h95f258a_1' : + 'quay.io/biocontainers/spades:3.15.5--h95f258a_1' }" input: tuple val(meta), path(illumina), path(pacbio), path(nanopore) - path hmm + path yml + path hmm output: tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds @@ -31,15 +32,14 @@ process SPADES { def pacbio_reads = pacbio ? "--pacbio $pacbio" : "" def nanopore_reads = nanopore ? "--nanopore $nanopore" : "" def custom_hmms = hmm ? "--custom-hmms $hmm" : "" + def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads" """ spades.py \\ $args \\ --threads $task.cpus \\ --memory $maxmem \\ $custom_hmms \\ - $illumina_reads \\ - $pacbio_reads \\ - $nanopore_reads \\ + $reads \\ -o ./ mv spades.log ${prefix}.spades.log diff --git a/modules/nf-core/modules/spades/meta.yml b/modules/nf-core/spades/meta.yml similarity index 86% rename from modules/nf-core/modules/spades/meta.yml rename to modules/nf-core/spades/meta.yml index e49cd139..701de1a7 100644 --- a/modules/nf-core/modules/spades/meta.yml +++ b/modules/nf-core/spades/meta.yml @@ -34,6 +34,13 @@ input: type: file description: | List of input FastQ files of size 1, originating from Oxford Nanopore technology. + - yml: + type: file + description: | + Path to yml file containing read information. + The raw FASTQ files listed in this YAML file MUST be supplied to the respective illumina/pacbio/nanopore input channel(s) _in addition_ to this YML. + File entries in this yml must contain only the file name and no paths. + pattern: "*.{yml,yaml}" - hmm: type: file description: File or directory with amino acid HMMs for Spades HMM-guided mode. diff --git a/modules/nf-core/tabix/bgzip/main.nf b/modules/nf-core/tabix/bgzip/main.nf new file mode 100644 index 00000000..6dd4e202 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/main.nf @@ -0,0 +1,54 @@ +process TABIX_BGZIP { + tag "$meta.id" + label 'process_single' + + conda "bioconda::tabix=1.11" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : + 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("${output}") , emit: output + tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + extension = in_bgzip ? input.getBaseName().tokenize(".")[-1] : input.getExtension() + output = in_bgzip ? "${prefix}.${extension}" : "${prefix}.${extension}.gz" + command = in_bgzip ? '-d' : '' + // Name the index according to $prefix, unless a name has been requested + if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) { + args = args + " -I ${output}.gzi" + } + """ + bgzip $command -c $args -@${task.cpus} $input > ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz" + + """ + touch ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/tabix/bgzip/meta.yml b/modules/nf-core/tabix/bgzip/meta.yml similarity index 89% rename from modules/nf-core/modules/tabix/bgzip/meta.yml rename to modules/nf-core/tabix/bgzip/meta.yml index 50070175..c3ea2107 100644 --- a/modules/nf-core/modules/tabix/bgzip/meta.yml +++ b/modules/nf-core/tabix/bgzip/meta.yml @@ -32,6 +32,10 @@ output: type: file description: Output compressed/decompressed file pattern: "*." + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" - versions: type: file description: File containing software versions @@ -40,3 +44,4 @@ authors: - "@joseespinosa" - "@drpatelh" - "@maxulysse" + - "@nvnieuwk" diff --git a/modules/nf-core/modules/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf similarity index 92% rename from modules/nf-core/modules/tabix/tabix/main.nf rename to modules/nf-core/tabix/tabix/main.nf index e155e468..9a404db9 100644 --- a/modules/nf-core/modules/tabix/tabix/main.nf +++ b/modules/nf-core/tabix/tabix/main.nf @@ -1,8 +1,8 @@ process TABIX_TABIX { tag "$meta.id" - label 'process_medium' + label 'process_single' - conda (params.enable_conda ? 'bioconda::tabix=1.11' : null) + conda "bioconda::tabix=1.11" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/tabix:1.11--hdfd78af_0' : 'quay.io/biocontainers/tabix:1.11--hdfd78af_0' }" diff --git a/modules/nf-core/modules/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml similarity index 100% rename from modules/nf-core/modules/tabix/tabix/meta.yml rename to modules/nf-core/tabix/tabix/meta.yml diff --git a/modules/nf-core/modules/unicycler/main.nf b/modules/nf-core/unicycler/main.nf similarity index 95% rename from modules/nf-core/modules/unicycler/main.nf rename to modules/nf-core/unicycler/main.nf index 4d3721b0..8981c433 100644 --- a/modules/nf-core/modules/unicycler/main.nf +++ b/modules/nf-core/unicycler/main.nf @@ -2,7 +2,7 @@ process UNICYCLER { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::unicycler=0.4.8' : null) + conda "bioconda::unicycler=0.4.8" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/unicycler:0.4.8--py38h8162308_3' : 'quay.io/biocontainers/unicycler:0.4.8--py38h8162308_3' }" diff --git a/modules/nf-core/modules/unicycler/meta.yml b/modules/nf-core/unicycler/meta.yml similarity index 100% rename from modules/nf-core/modules/unicycler/meta.yml rename to modules/nf-core/unicycler/meta.yml diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 00000000..3384847a --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,63 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "conda-forge::sed=4.7 bioconda::grep=3.4 conda-forge::tar=1.34" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/untar/meta.yml similarity index 85% rename from modules/nf-core/modules/untar/meta.yml rename to modules/nf-core/untar/meta.yml index d426919b..ea7a3f38 100644 --- a/modules/nf-core/modules/untar/meta.yml +++ b/modules/nf-core/untar/meta.yml @@ -26,9 +26,9 @@ output: Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - untar: - type: file - description: - pattern: "*.*" + type: directory + description: Directory containing contents of archive + pattern: "*/" - versions: type: file description: File containing software versions @@ -36,3 +36,5 @@ output: authors: - "@joseespinosa" - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/modules/vcflib/vcfuniq/main.nf b/modules/nf-core/vcflib/vcfuniq/main.nf similarity index 94% rename from modules/nf-core/modules/vcflib/vcfuniq/main.nf rename to modules/nf-core/vcflib/vcfuniq/main.nf index 707f074b..6b09dc4d 100644 --- a/modules/nf-core/modules/vcflib/vcfuniq/main.nf +++ b/modules/nf-core/vcflib/vcfuniq/main.nf @@ -3,7 +3,7 @@ process VCFLIB_VCFUNIQ { label 'process_low' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::vcflib=1.0.3" : null) + conda "bioconda::vcflib=1.0.3" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/vcflib:1.0.3--hecb563c_1': 'quay.io/biocontainers/vcflib:1.0.3--hecb563c_1' }" diff --git a/modules/nf-core/modules/vcflib/vcfuniq/meta.yml b/modules/nf-core/vcflib/vcfuniq/meta.yml similarity index 95% rename from modules/nf-core/modules/vcflib/vcfuniq/meta.yml rename to modules/nf-core/vcflib/vcfuniq/meta.yml index 50a3617a..8b5e9e89 100644 --- a/modules/nf-core/modules/vcflib/vcfuniq/meta.yml +++ b/modules/nf-core/vcflib/vcfuniq/meta.yml @@ -9,7 +9,7 @@ tools: description: Command-line tools for manipulating VCF files homepage: https://github.com/vcflib/vcflib documentation: https://github.com/vcflib/vcflib#USAGE - doi: "https://doi.org/10.1101/2021.05.21.445151" + doi: "10.1101/2021.05.21.445151" licence: ["MIT"] input: diff --git a/nextflow.config b/nextflow.config index 45c356a8..8641580f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -51,7 +51,7 @@ params { skip_multiqc = false // Illumina QC, read trimming and filtering options - kraken2_db = 's3://nf-core-awsmegatests/viralrecon/input_data/kraken2_human.tar.gz' + kraken2_db = 's3://ngi-igenomes/test-data/viralrecon/kraken2_human.tar.gz' kraken2_db_name = 'human' kraken2_variants_host_filter = false kraken2_assembly_host_filter = true @@ -85,23 +85,24 @@ params { skip_bandage = false skip_blast = false skip_abacas = false - skip_plasmidid = false + skip_plasmidid = true skip_assembly_quast = false skip_assembly = false // Boilerplate options outdir = null - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null plaintext_email = false monochrome_logs = false + hook_url = null help = false + version = false validate_params = true show_hidden_params = false - schema_ignore_params = 'genomes,igenomes_base' - enable_conda = false + schema_ignore_params = 'genomes' + // Config options custom_config_version = 'master' @@ -111,6 +112,7 @@ params { config_profile_url = null config_profile_name = null + // Max resource options // Defaults only, expecting to be overwritten max_memory = '128.GB' @@ -140,12 +142,25 @@ try { profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { - params.enable_conda = true + conda.enabled = true docker.enabled = false singularity.enabled = false podman.enabled = false shifter.enabled = false charliecloud.enabled = false + // Increase time available to build Conda environment + conda.createTimeout = "120 min" + } + mamba { + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + // Increase time available to build Conda environment + conda.createTimeout = "120 min" } docker { docker.enabled = true @@ -155,6 +170,9 @@ profiles { shifter.enabled = false charliecloud.enabled = false } + arm { + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + } singularity { singularity.enabled = true singularity.autoMounts = true @@ -184,6 +202,11 @@ profiles { podman.enabled = false shifter.enabled = false } + gitpod { + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB + } test { includeConfig 'conf/test.config' } test_sispa { includeConfig 'conf/test_sispa.config' } test_nanopore { includeConfig 'conf/test_nanopore.config' } @@ -193,9 +216,6 @@ profiles { test_full_sispa { includeConfig 'conf/test_full_sispa.config' } } -// Increase time available to build Conda environment -conda { createTimeout = "120 min" } - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -213,29 +233,30 @@ process.shell = ['/bin/bash', '-euo', 'pipefail'] def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { name = 'nf-core/viralrecon' - author = 'Sarai Varona and Sara Monzon' + author = """Patel H, Varona S and Monzon S""" homePage = 'https://github.com/nf-core/viralrecon' - description = 'Assembly and intrahost/low-frequency variant calling for viral samples' + description = """Assembly and intrahost/low-frequency variant calling for viral samples""" mainScript = 'main.nf' - nextflowVersion = '!>=21.10.3' - version = '2.5' + nextflowVersion = '!>=22.10.1' + version = '2.6.0' + doi = 'https://doi.org/10.5281/zenodo.3901628' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index 1e8b7d38..ff58253a 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -320,7 +320,7 @@ "kraken2_db": { "type": "string", "format": "path", - "default": "s3://nf-core-awsmegatests/viralrecon/input_data/kraken2_human.tar.gz", + "default": "s3://ngi-igenomes/test-data/viralrecon/kraken2_human.tar.gz", "fa_icon": "fab fa-gitkraken", "description": "Full path to Kraken2 database built from host genome." }, @@ -519,7 +519,8 @@ "skip_plasmidid": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip assembly report generation by PlasmidID." + "description": "Skip assembly report generation by PlasmidID.", + "default": true }, "skip_assembly_quast": { "type": "boolean", @@ -547,6 +548,12 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, "publish_dir_mode": { "type": "string", "default": "copy", @@ -578,20 +585,13 @@ "hidden": true, "help_text": "Set to disable colourful command line output and live life in monochrome." }, - "tracedir": { + "hook_url": { "type": "string", - "format": "directory-path", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", + "description": "Incoming hook URL for messaging service", + "fa_icon": "fas fa-people-group", + "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", "hidden": true }, - "enable_conda": { - "type": "boolean", - "hidden": true, - "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", - "fa_icon": "fas fa-bacon" - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -655,8 +655,7 @@ "description": "Git commit id for Institutional configs.", "default": "master", "hidden": true, - "fa_icon": "fas fa-users-cog", - "help_text": "Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default: `master`.\n\n```bash\n## Download and use config file with following git commit id\n--custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96\n```" + "fa_icon": "fas fa-users-cog" }, "custom_config_base": { "type": "string", diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..0d62beb6 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 diff --git a/subworkflows/local/assembly_minia.nf b/subworkflows/local/assembly_minia.nf index 43d34c6e..e32ac5cc 100644 --- a/subworkflows/local/assembly_minia.nf +++ b/subworkflows/local/assembly_minia.nf @@ -2,7 +2,7 @@ // Assembly and downstream processing for minia scaffolds // -include { MINIA } from '../../modules/nf-core/modules/minia/main' +include { MINIA } from '../../modules/nf-core/minia/main' include { ASSEMBLY_QC } from './assembly_qc' diff --git a/subworkflows/local/assembly_qc.nf b/subworkflows/local/assembly_qc.nf index 9c42c652..6ca9fc78 100644 --- a/subworkflows/local/assembly_qc.nf +++ b/subworkflows/local/assembly_qc.nf @@ -3,10 +3,10 @@ // include { FILTER_BLASTN } from '../../modules/local/filter_blastn' -include { ABACAS } from '../../modules/nf-core/modules/abacas/main' -include { BLAST_BLASTN } from '../../modules/nf-core/modules/blast/blastn/main' -include { PLASMIDID } from '../../modules/nf-core/modules/plasmidid/main' -include { QUAST } from '../../modules/nf-core/modules/quast/main' +include { ABACAS } from '../../modules/nf-core/abacas/main' +include { BLAST_BLASTN } from '../../modules/nf-core/blast/blastn/main' +include { PLASMIDID } from '../../modules/nf-core/plasmidid/main' +include { QUAST } from '../../modules/nf-core/quast/main' workflow ASSEMBLY_QC { take: diff --git a/subworkflows/local/assembly_spades.nf b/subworkflows/local/assembly_spades.nf index 46457c23..821ce927 100644 --- a/subworkflows/local/assembly_spades.nf +++ b/subworkflows/local/assembly_spades.nf @@ -2,10 +2,10 @@ // Assembly and downstream processing for SPAdes scaffolds // -include { SPADES } from '../../modules/nf-core/modules/spades/main' -include { BANDAGE_IMAGE } from '../../modules/nf-core/modules/bandage/image/main' -include { GUNZIP as GUNZIP_SCAFFOLDS } from '../../modules/nf-core/modules/gunzip/main' -include { GUNZIP as GUNZIP_GFA } from '../../modules/nf-core/modules/gunzip/main' +include { SPADES } from '../../modules/nf-core/spades/main' +include { BANDAGE_IMAGE } from '../../modules/nf-core/bandage/image/main' +include { GUNZIP as GUNZIP_SCAFFOLDS } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GFA } from '../../modules/nf-core/gunzip/main' include { ASSEMBLY_QC } from './assembly_qc' @@ -38,6 +38,7 @@ workflow ASSEMBLY_SPADES { // SPADES ( ch_reads, + [], hmm ) ch_versions = ch_versions.mix(SPADES.out.versions.first()) diff --git a/subworkflows/local/assembly_unicycler.nf b/subworkflows/local/assembly_unicycler.nf index 88df5ef5..bb6a4d59 100644 --- a/subworkflows/local/assembly_unicycler.nf +++ b/subworkflows/local/assembly_unicycler.nf @@ -2,10 +2,10 @@ // Assembly and downstream processing for Unicycler scaffolds // -include { UNICYCLER } from '../../modules/nf-core/modules/unicycler/main' -include { BANDAGE_IMAGE } from '../../modules/nf-core/modules/bandage/image/main' -include { GUNZIP as GUNZIP_SCAFFOLDS } from '../../modules/nf-core/modules/gunzip/main' -include { GUNZIP as GUNZIP_GFA } from '../../modules/nf-core/modules/gunzip/main' +include { UNICYCLER } from '../../modules/nf-core/unicycler/main' +include { BANDAGE_IMAGE } from '../../modules/nf-core/bandage/image/main' +include { GUNZIP as GUNZIP_SCAFFOLDS } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GFA } from '../../modules/nf-core/gunzip/main' include { ASSEMBLY_QC } from './assembly_qc' diff --git a/subworkflows/local/bam_trim_primers_ivar.nf b/subworkflows/local/bam_trim_primers_ivar.nf new file mode 100644 index 00000000..7a82ce30 --- /dev/null +++ b/subworkflows/local/bam_trim_primers_ivar.nf @@ -0,0 +1,48 @@ +// +// iVar trim, sort, index BAM file and run samtools stats, flagstat and idxstats +// + +include { IVAR_TRIM } from '../../modules/nf-core/ivar/trim/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools/main' + +workflow BAM_TRIM_PRIMERS_IVAR { + take: + bam // channel: [ val(meta), [ bam ], [bai] ] + bed // path : bed + fasta // channel: reference.fasta + + main: + + ch_versions = Channel.empty() + + // + // iVar trim primers + // + IVAR_TRIM ( + bam, + bed + ) + ch_versions = ch_versions.mix(IVAR_TRIM.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( + IVAR_TRIM.out.bam, + fasta + ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam_orig = IVAR_TRIM.out.bam // channel: [ val(meta), bam ] + log_out = IVAR_TRIM.out.log // channel: [ val(meta), log ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/consensus_bcftools.nf b/subworkflows/local/consensus_bcftools.nf index 3f82151e..1a3ae3a2 100644 --- a/subworkflows/local/consensus_bcftools.nf +++ b/subworkflows/local/consensus_bcftools.nf @@ -2,11 +2,11 @@ // Consensus calling with BCFTools and downstream processing QC // -include { BCFTOOLS_FILTER } from '../../modules/nf-core/modules/bcftools/filter/main' -include { TABIX_TABIX } from '../../modules/nf-core/modules/tabix/tabix/main' -include { BEDTOOLS_MERGE } from '../../modules/nf-core/modules/bedtools/merge/main' -include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/modules/bedtools/maskfasta/main' -include { BCFTOOLS_CONSENSUS } from '../../modules/nf-core/modules/bcftools/consensus/main' +include { BCFTOOLS_FILTER } from '../../modules/nf-core/bcftools/filter/main' +include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' +include { BEDTOOLS_MERGE } from '../../modules/nf-core/bedtools/merge/main' +include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/bedtools/maskfasta/main' +include { BCFTOOLS_CONSENSUS } from '../../modules/nf-core/bcftools/consensus/main' include { MAKE_BED_MASK } from '../../modules/local/make_bed_mask' include { RENAME_FASTA_HEADER } from '../../modules/local/rename_fasta_header' include { CONSENSUS_QC } from './consensus_qc' diff --git a/subworkflows/local/consensus_ivar.nf b/subworkflows/local/consensus_ivar.nf index 15fc326e..e96ffd44 100644 --- a/subworkflows/local/consensus_ivar.nf +++ b/subworkflows/local/consensus_ivar.nf @@ -2,7 +2,7 @@ // Consensus calling with iVar and downstream processing QC // -include { IVAR_CONSENSUS } from '../../modules/nf-core/modules/ivar/consensus/main' +include { IVAR_CONSENSUS } from '../../modules/nf-core/ivar/consensus/main' include { CONSENSUS_QC } from './consensus_qc' workflow CONSENSUS_IVAR { diff --git a/subworkflows/local/consensus_qc.nf b/subworkflows/local/consensus_qc.nf index 8b24e60e..3e686f6f 100644 --- a/subworkflows/local/consensus_qc.nf +++ b/subworkflows/local/consensus_qc.nf @@ -2,9 +2,9 @@ // Consensus calling QC // -include { QUAST } from '../../modules/nf-core/modules/quast/main' -include { PANGOLIN } from '../../modules/nf-core/modules/pangolin/main' -include { NEXTCLADE_RUN } from '../../modules/nf-core/modules/nextclade/run/main' +include { QUAST } from '../../modules/nf-core/quast/main' +include { PANGOLIN } from '../../modules/nf-core/pangolin/main' +include { NEXTCLADE_RUN } from '../../modules/nf-core/nextclade/run/main' include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' workflow CONSENSUS_QC { diff --git a/subworkflows/nf-core/fastqc_fastp.nf b/subworkflows/local/fastq_trim_fastp_fastqc.nf similarity index 90% rename from subworkflows/nf-core/fastqc_fastp.nf rename to subworkflows/local/fastq_trim_fastp_fastqc.nf index 13ba31eb..101741c8 100644 --- a/subworkflows/nf-core/fastqc_fastp.nf +++ b/subworkflows/local/fastq_trim_fastp_fastqc.nf @@ -2,9 +2,9 @@ // Read QC and trimming // -include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/modules/fastqc/main' -include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/modules/fastqc/main' -include { FASTP } from '../../modules/nf-core/modules/fastp/main' +include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/fastqc/main' +include { FASTP } from '../../modules/nf-core/fastp/main' // // Function that parses fastp json output file to get total number of reads after trimming @@ -16,9 +16,10 @@ def getFastpReadsAfterFiltering(json_file) { return json['after_filtering']['total_reads'].toInteger() } -workflow FASTQC_FASTP { +workflow FASTQ_TRIM_FASTP_FASTQC { take: reads // channel: [ val(meta), [ reads ] ] + adapter_fasta // file: adapter.fasta save_trimmed_fail // value: boolean save_merged // value: boolean @@ -48,6 +49,7 @@ workflow FASTQC_FASTP { if (!params.skip_fastp) { FASTP ( reads, + adapter_fasta, save_trimmed_fail, save_merged ) diff --git a/subworkflows/nf-core/filter_bam_samtools.nf b/subworkflows/local/filter_bam_samtools.nf similarity index 83% rename from subworkflows/nf-core/filter_bam_samtools.nf rename to subworkflows/local/filter_bam_samtools.nf index 050bf085..2d47a29a 100644 --- a/subworkflows/nf-core/filter_bam_samtools.nf +++ b/subworkflows/local/filter_bam_samtools.nf @@ -2,9 +2,9 @@ // Filter co-ordinate sorted BAM, index and run samtools stats, flagstat and idxstats // -include { SAMTOOLS_VIEW } from '../../modules/nf-core/modules/samtools/view/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' +include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools/main' workflow FILTER_BAM_SAMTOOLS { take: @@ -20,7 +20,8 @@ workflow FILTER_BAM_SAMTOOLS { // SAMTOOLS_VIEW ( bam_bai, - fasta + fasta, + [] ) ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) @@ -33,7 +34,8 @@ workflow FILTER_BAM_SAMTOOLS { ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) BAM_STATS_SAMTOOLS ( - SAMTOOLS_VIEW.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]) + SAMTOOLS_VIEW.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), + fasta ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) diff --git a/subworkflows/local/prepare_genome_illumina.nf b/subworkflows/local/prepare_genome_illumina.nf index 70d1e53e..4b7c1dda 100644 --- a/subworkflows/local/prepare_genome_illumina.nf +++ b/subworkflows/local/prepare_genome_illumina.nf @@ -2,19 +2,19 @@ // Uncompress and prepare reference genome files // -include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/modules/gunzip/main' -include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/modules/gunzip/main' -include { GUNZIP as GUNZIP_PRIMER_BED } from '../../modules/nf-core/modules/gunzip/main' -include { GUNZIP as GUNZIP_PRIMER_FASTA } from '../../modules/nf-core/modules/gunzip/main' -include { UNTAR as UNTAR_BOWTIE2_INDEX } from '../../modules/nf-core/modules/untar/main' -include { UNTAR as UNTAR_NEXTCLADE_DB } from '../../modules/nf-core/modules/untar/main' -include { UNTAR as UNTAR_KRAKEN2_DB } from '../../modules/nf-core/modules/untar/main' -include { UNTAR as UNTAR_BLAST_DB } from '../../modules/nf-core/modules/untar/main' -include { BOWTIE2_BUILD } from '../../modules/nf-core/modules/bowtie2/build/main' -include { BLAST_MAKEBLASTDB } from '../../modules/nf-core/modules/blast/makeblastdb/main' -include { BEDTOOLS_GETFASTA } from '../../modules/nf-core/modules/bedtools/getfasta/main' -include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/modules/custom/getchromsizes/main' -include { NEXTCLADE_DATASETGET } from '../../modules/nf-core/modules/nextclade/datasetget/main' +include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PRIMER_BED } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PRIMER_FASTA } from '../../modules/nf-core/gunzip/main' +include { UNTAR as UNTAR_BOWTIE2_INDEX } from '../../modules/nf-core/untar/main' +include { UNTAR as UNTAR_NEXTCLADE_DB } from '../../modules/nf-core/untar/main' +include { UNTAR as UNTAR_KRAKEN2_DB } from '../../modules/nf-core/untar/main' +include { UNTAR as UNTAR_BLAST_DB } from '../../modules/nf-core/untar/main' +include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' +include { BLAST_MAKEBLASTDB } from '../../modules/nf-core/blast/makeblastdb/main' +include { BEDTOOLS_GETFASTA } from '../../modules/nf-core/bedtools/getfasta/main' +include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main' +include { NEXTCLADE_DATASETGET } from '../../modules/nf-core/nextclade/datasetget/main' include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' include { KRAKEN2_BUILD } from '../../modules/local/kraken2_build' include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' @@ -34,12 +34,13 @@ workflow PREPARE_GENOME { ch_fasta = GUNZIP_FASTA.out.gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) } else { - ch_fasta = file(params.fasta) + ch_fasta = Channel.value(file(params.fasta)) } // // Uncompress GFF annotation file // + ch_gff = Channel.empty() if (params.gff) { if (params.gff.endsWith('.gz')) { GUNZIP_GFF ( @@ -48,25 +49,19 @@ workflow PREPARE_GENOME { ch_gff = GUNZIP_GFF.out.gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) } else { - ch_gff = file(params.gff) + ch_gff = Channel.value(file(params.gff)) } - } else { - ch_gff = [] } // // Create chromosome sizes file // - ch_fai = Channel.empty() - ch_chrom_sizes = Channel.empty() - if (params.protocol == 'amplicon' || !params.skip_asciigenome) { - CUSTOM_GETCHROMSIZES ( - ch_fasta - ) - ch_fai = CUSTOM_GETCHROMSIZES.out.fai - ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes - ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) - } + CUSTOM_GETCHROMSIZES ( + ch_fasta.map { [ [:], it ] } + ) + ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] } + ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) // // Prepare reference files required for variant calling @@ -81,13 +76,13 @@ workflow PREPARE_GENOME { ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_KRAKEN2_DB.out.versions) } else { - ch_kraken2_db = file(params.kraken2_db) + ch_kraken2_db = Channel.value(file(params.kraken2_db)) } } else { KRAKEN2_BUILD ( params.kraken2_db_name ) - ch_kraken2_db = KRAKEN2_BUILD.out.db + ch_kraken2_db = KRAKEN2_BUILD.out.db.first() ch_versions = ch_versions.mix(KRAKEN2_BUILD.out.versions) } } @@ -107,7 +102,7 @@ workflow PREPARE_GENOME { ch_primer_bed = GUNZIP_PRIMER_BED.out.gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_PRIMER_BED.out.versions) } else { - ch_primer_bed = file(params.primer_bed) + ch_primer_bed = Channel.value(file(params.primer_bed)) } } @@ -130,7 +125,7 @@ workflow PREPARE_GENOME { ch_primer_fasta = GUNZIP_PRIMER_FASTA.out.gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_PRIMER_FASTA.out.versions) } else { - ch_primer_fasta = file(params.primer_fasta) + ch_primer_fasta = Channel.value(file(params.primer_fasta)) } } else { BEDTOOLS_GETFASTA ( @@ -156,11 +151,11 @@ workflow PREPARE_GENOME { ch_bowtie2_index = UNTAR_BOWTIE2_INDEX.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions) } else { - ch_bowtie2_index = file(params.bowtie2_index) + ch_bowtie2_index = Channel.value(file(params.bowtie2_index)) } } else { BOWTIE2_BUILD ( - ch_fasta + ch_fasta.map { [ [:], it ] } ) ch_bowtie2_index = BOWTIE2_BUILD.out.index ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) @@ -180,7 +175,7 @@ workflow PREPARE_GENOME { ch_nextclade_db = UNTAR_NEXTCLADE_DB.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_NEXTCLADE_DB.out.versions) } else { - ch_nextclade_db = file(params.nextclade_dataset) + ch_nextclade_db = Channel.value(file(params.nextclade_dataset)) } } else if (params.nextclade_dataset_name) { NEXTCLADE_DATASETGET ( @@ -207,7 +202,7 @@ workflow PREPARE_GENOME { ch_blast_db = UNTAR_BLAST_DB.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_BLAST_DB.out.versions) } else { - ch_blast_db = file(params.blast_db) + ch_blast_db = Channel.value(file(params.blast_db)) } } else { BLAST_MAKEBLASTDB ( @@ -224,7 +219,7 @@ workflow PREPARE_GENOME { // ch_snpeff_db = Channel.empty() ch_snpeff_config = Channel.empty() - if (!params.skip_variants && params.gff && !params.skip_snpeff) { + if (!params.skip_variants && !params.skip_snpeff) { SNPEFF_BUILD ( ch_fasta, ch_gff diff --git a/subworkflows/local/prepare_genome_nanopore.nf b/subworkflows/local/prepare_genome_nanopore.nf index 208e582b..1153b709 100644 --- a/subworkflows/local/prepare_genome_nanopore.nf +++ b/subworkflows/local/prepare_genome_nanopore.nf @@ -2,12 +2,12 @@ // Uncompress and prepare reference genome files // -include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/modules/gunzip/main' -include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/modules/gunzip/main' -include { GUNZIP as GUNZIP_PRIMER_BED } from '../../modules/nf-core/modules/gunzip/main' -include { UNTAR } from '../../modules/nf-core/modules/untar/main' -include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/modules/custom/getchromsizes/main' -include { NEXTCLADE_DATASETGET } from '../../modules/nf-core/modules/nextclade/datasetget/main' +include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PRIMER_BED } from '../../modules/nf-core/gunzip/main' +include { UNTAR } from '../../modules/nf-core/untar/main' +include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main' +include { NEXTCLADE_DATASETGET } from '../../modules/nf-core/nextclade/datasetget/main' include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' @@ -26,12 +26,13 @@ workflow PREPARE_GENOME { ch_fasta = GUNZIP_FASTA.out.gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) } else { - ch_fasta = file(params.fasta) + ch_fasta = Channel.value(file(params.fasta)) } // // Uncompress GFF annotation file // + ch_gff = Channel.empty() if (params.gff) { if (params.gff.endsWith('.gz')) { GUNZIP_GFF ( @@ -40,20 +41,18 @@ workflow PREPARE_GENOME { ch_gff = GUNZIP_GFF.out.gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) } else { - ch_gff = file(params.gff) + ch_gff = Channel.value(file(params.gff)) } - } else { - ch_gff = [] } // // Create chromosome sizes file // CUSTOM_GETCHROMSIZES ( - ch_fasta + ch_fasta.map { [ [:], it ] } ) - ch_fai = CUSTOM_GETCHROMSIZES.out.fai - ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes + ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] } + ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) // @@ -68,7 +67,7 @@ workflow PREPARE_GENOME { ch_primer_bed = GUNZIP_PRIMER_BED.out.gunzip.map { it[1] } ch_versions = ch_versions.mix(GUNZIP_PRIMER_BED.out.versions) } else { - ch_primer_bed = file(params.primer_bed) + ch_primer_bed = Channel.value(file(params.primer_bed)) } } @@ -99,7 +98,7 @@ workflow PREPARE_GENOME { ch_nextclade_db = UNTAR.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR.out.versions) } else { - ch_nextclade_db = file(params.nextclade_dataset) + ch_nextclade_db = Channel.value(file(params.nextclade_dataset)) } } else if (params.nextclade_dataset_name) { NEXTCLADE_DATASETGET ( @@ -117,7 +116,7 @@ workflow PREPARE_GENOME { // ch_snpeff_db = Channel.empty() ch_snpeff_config = Channel.empty() - if (params.gff && !params.skip_snpeff) { + if (!params.skip_snpeff) { SNPEFF_BUILD ( ch_fasta, ch_gff diff --git a/subworkflows/local/snpeff_snpsift.nf b/subworkflows/local/snpeff_snpsift.nf index ad84feb3..92a6a669 100644 --- a/subworkflows/local/snpeff_snpsift.nf +++ b/subworkflows/local/snpeff_snpsift.nf @@ -5,7 +5,7 @@ include { SNPEFF_ANN } from '../../modules/local/snpeff_ann' include { SNPSIFT_EXTRACTFIELDS } from '../../modules/local/snpsift_extractfields' -include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' +include { VCF_BGZIP_TABIX_STATS } from './vcf_bgzip_tabix_stats' workflow SNPEFF_SNPSIFT { take: @@ -27,7 +27,10 @@ workflow SNPEFF_SNPSIFT { ch_versions = ch_versions.mix(SNPEFF_ANN.out.versions.first()) VCF_BGZIP_TABIX_STATS ( - SNPEFF_ANN.out.vcf + SNPEFF_ANN.out.vcf, + [], + [], + [] ) ch_versions = ch_versions.mix(VCF_BGZIP_TABIX_STATS.out.versions) @@ -43,6 +46,7 @@ workflow SNPEFF_SNPSIFT { vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf.gz ] ] tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + csi = VCF_BGZIP_TABIX_STATS.out.csi // channel: [ val(meta), [ csi ] ] stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] snpsift_txt = SNPSIFT_EXTRACTFIELDS.out.txt // channel: [ val(meta), [ txt ] ] diff --git a/subworkflows/local/variants_bcftools.nf b/subworkflows/local/variants_bcftools.nf index 868ff522..103ba725 100644 --- a/subworkflows/local/variants_bcftools.nf +++ b/subworkflows/local/variants_bcftools.nf @@ -2,9 +2,9 @@ // Variant calling with BCFTools, downstream processing and QC // -include { BCFTOOLS_MPILEUP } from '../../modules/nf-core/modules/bcftools/mpileup/main' -include { BCFTOOLS_NORM } from '../../modules/nf-core/modules/bcftools/norm/main' -include { VCF_TABIX_STATS } from '../nf-core/vcf_tabix_stats' +include { BCFTOOLS_MPILEUP } from '../../modules/nf-core/bcftools/mpileup/main' +include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm/main' +include { VCF_TABIX_STATS } from './vcf_tabix_stats' include { VARIANTS_QC } from './variants_qc' workflow VARIANTS_BCFTOOLS { @@ -25,7 +25,7 @@ workflow VARIANTS_BCFTOOLS { // Call variants // BCFTOOLS_MPILEUP ( - bam, + bam.map{ meta, bam_file -> [ meta, bam_file, [] ] }, fasta, params.save_mpileup ) @@ -62,7 +62,10 @@ workflow VARIANTS_BCFTOOLS { ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions.first()) VCF_TABIX_STATS ( - BCFTOOLS_NORM.out.vcf + BCFTOOLS_NORM.out.vcf, + [], + [], + [] ) ch_versions = ch_versions.mix(VCF_TABIX_STATS.out.versions) @@ -89,6 +92,7 @@ workflow VARIANTS_BCFTOOLS { vcf = BCFTOOLS_NORM.out.vcf // channel: [ val(meta), [ vcf ] ] tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + csi = VCF_TABIX_STATS.out.csi // channel: [ val(meta), [ csi ] ] stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] snpeff_vcf = VARIANTS_QC.out.snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index fee5c54c..26e460aa 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -2,10 +2,10 @@ // Variant calling with IVar, downstream processing and QC // -include { IVAR_VARIANTS } from '../../modules/nf-core/modules/ivar/variants/main' +include { IVAR_VARIANTS } from '../../modules/nf-core/ivar/variants/main' include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' -include { BCFTOOLS_SORT } from '../../modules/nf-core/modules/bcftools/sort/main' -include { VCF_TABIX_STATS } from '../nf-core/vcf_tabix_stats' +include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main' +include { VCF_TABIX_STATS } from './vcf_tabix_stats' include { VARIANTS_QC } from './variants_qc' workflow VARIANTS_IVAR { @@ -59,7 +59,10 @@ workflow VARIANTS_IVAR { ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions.first()) VCF_TABIX_STATS ( - BCFTOOLS_SORT.out.vcf + BCFTOOLS_SORT.out.vcf, + [], + [], + [] ) ch_versions = ch_versions.mix(VCF_TABIX_STATS.out.versions) @@ -88,6 +91,7 @@ workflow VARIANTS_IVAR { vcf = BCFTOOLS_SORT.out.vcf // channel: [ val(meta), [ vcf ] ] tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + csi = VCF_TABIX_STATS.out.csi // channel: [ val(meta), [ csi ] ] stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] snpeff_vcf = VARIANTS_QC.out.snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] diff --git a/subworkflows/local/variants_long_table.nf b/subworkflows/local/variants_long_table.nf index 938f3f3b..a3ad6b0b 100644 --- a/subworkflows/local/variants_long_table.nf +++ b/subworkflows/local/variants_long_table.nf @@ -2,7 +2,7 @@ // Create a long table with variant information including AA changes and lineage info // -include { BCFTOOLS_QUERY } from '../../modules/nf-core/modules/bcftools/query/main' +include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query/main' include { MAKE_VARIANTS_LONG_TABLE } from '../../modules/local/make_variants_long_table' workflow VARIANTS_LONG_TABLE { diff --git a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf b/subworkflows/local/vcf_bgzip_tabix_stats.nf similarity index 64% rename from subworkflows/nf-core/vcf_bgzip_tabix_stats.nf rename to subworkflows/local/vcf_bgzip_tabix_stats.nf index 67e4f992..60f1b274 100644 --- a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf +++ b/subworkflows/local/vcf_bgzip_tabix_stats.nf @@ -2,12 +2,15 @@ // Run BCFTools bgzip, tabix and stats commands // -include { TABIX_BGZIP } from '../../modules/nf-core/modules/tabix/bgzip/main' +include { TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' include { VCF_TABIX_STATS } from './vcf_tabix_stats' workflow VCF_BGZIP_TABIX_STATS { take: - vcf // channel: [ val(meta), [ vcf ] ] + vcf // channel: [ val(meta), [ vcf ] ] + regions // file: regions.txt + targets // file: targets.txt + samples // file: samples.txt main: @@ -19,13 +22,18 @@ workflow VCF_BGZIP_TABIX_STATS { ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) VCF_TABIX_STATS ( - TABIX_BGZIP.out.output + TABIX_BGZIP.out.output, + regions, + targets, + samples ) ch_versions = ch_versions.mix(VCF_TABIX_STATS.out.versions) emit: vcf = TABIX_BGZIP.out.output // channel: [ val(meta), [ vcf.gz ] ] + tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + csi = VCF_TABIX_STATS.out.csi // channel: [ val(meta), [ csi ] ] stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] versions = ch_versions // channel: [ versions.yml ] diff --git a/subworkflows/nf-core/vcf_tabix_stats.nf b/subworkflows/local/vcf_tabix_stats.nf similarity index 54% rename from subworkflows/nf-core/vcf_tabix_stats.nf rename to subworkflows/local/vcf_tabix_stats.nf index 623ff347..c99321ae 100644 --- a/subworkflows/nf-core/vcf_tabix_stats.nf +++ b/subworkflows/local/vcf_tabix_stats.nf @@ -2,12 +2,15 @@ // Run BCFTools tabix and stats commands // -include { TABIX_TABIX } from '../../modules/nf-core/modules/tabix/tabix/main' -include { BCFTOOLS_STATS } from '../../modules/nf-core/modules/bcftools/stats/main' +include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_STATS } from '../../modules/nf-core/bcftools/stats/main' workflow VCF_TABIX_STATS { take: - vcf // channel: [ val(meta), [ vcf ] ] + vcf // channel: [ val(meta), [ vcf ] ] + regions // file: regions.txt + targets // file: targets.txt + samples // file: samples.txt main: @@ -19,12 +22,17 @@ workflow VCF_TABIX_STATS { ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) BCFTOOLS_STATS ( - vcf + vcf.join(TABIX_TABIX.out.tbi, by: [0]), + regions, + targets, + samples ) ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions.first()) emit: tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), [ tbi ] ] + csi = TABIX_TABIX.out.csi // channel: [ val(meta), [ csi ] ] + stats = BCFTOOLS_STATS.out.stats // channel: [ val(meta), [ txt ] ] versions = ch_versions // channel: [ versions.yml ] diff --git a/subworkflows/nf-core/align_bowtie2.nf b/subworkflows/nf-core/align_bowtie2.nf deleted file mode 100644 index af2f676d..00000000 --- a/subworkflows/nf-core/align_bowtie2.nf +++ /dev/null @@ -1,51 +0,0 @@ -// -// Alignment with Bowtie2 -// - -include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' - -workflow ALIGN_BOWTIE2 { - take: - reads // channel: [ val(meta), [ reads ] ] - index // channel: /path/to/bowtie2/index/ - save_unaligned // value: boolean - sort_bam // value: boolean - - main: - - ch_versions = Channel.empty() - - // - // Map reads with Bowtie2 - // - BOWTIE2_ALIGN ( - reads, - index, - save_unaligned, - sort_bam - ) - ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) - - // - // Sort, index BAM file and run samtools stats, flagstat and idxstats - // - BAM_SORT_SAMTOOLS ( - BOWTIE2_ALIGN.out.bam - ) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) - - emit: - bam_orig = BOWTIE2_ALIGN.out.bam // channel: [ val(meta), bam ] - log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] - fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] - - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - csi = BAM_SORT_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/mark_duplicates_picard.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf similarity index 59% rename from subworkflows/nf-core/mark_duplicates_picard.nf rename to subworkflows/nf-core/bam_markduplicates_picard/main.nf index 08bb41ba..6e3df332 100644 --- a/subworkflows/nf-core/mark_duplicates_picard.nf +++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -2,37 +2,28 @@ // Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats // -include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/modules/picard/markduplicates/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' +include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_MARKDUPLICATES_PICARD { -workflow MARK_DUPLICATES_PICARD { take: - bam // channel: [ val(meta), [ bam ] ] + ch_bam // channel: [ val(meta), path(bam) ] + ch_fasta // channel: [ path(fasta) ] + ch_fai // channel: [ path(fai) ] main: ch_versions = Channel.empty() - // - // Picard MarkDuplicates - // - PICARD_MARKDUPLICATES ( - bam - ) + PICARD_MARKDUPLICATES ( ch_bam, ch_fasta, ch_fai ) ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) - // - // Index BAM file and run samtools stats, flagstat and idxstats - // - SAMTOOLS_INDEX ( - PICARD_MARKDUPLICATES.out.bam - ) + SAMTOOLS_INDEX ( PICARD_MARKDUPLICATES.out.bam ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - PICARD_MARKDUPLICATES - .out - .bam + ch_bam_bai = PICARD_MARKDUPLICATES.out.bam .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) .map { @@ -43,22 +34,19 @@ workflow MARK_DUPLICATES_PICARD { [ meta, bam, csi ] } } - .set { ch_bam_bai } - BAM_STATS_SAMTOOLS ( - ch_bam_bai - ) + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) emit: - bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), [ bam ] ] - metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), [ metrics ] ] - - bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] - csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] - stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(bam) ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml new file mode 100644 index 00000000..822c6132 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml @@ -0,0 +1,60 @@ +name: "bam_markduplicates_picard" +description: Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats +keywords: + - markduplicates + - bam + - sam + - cram + +modules: + - picard/markduplicates + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + +input: + - ch_bam: + description: | + BAM/CRAM/SAM file + Structure: [ val(meta), path(bam) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] + - ch_fasta: + description: | + Index of the reference genome fasta file + Structure: [ path(fai) ] +output: + - bam: + description: | + processed BAM/CRAM/SAM file + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAM/CRAM/SAM samtools index + Structure: [ val(meta), path(bai) ] + - csi: + description: | + CSI samtools index + Structure: [ val(meta), path(csi) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@dmarron" + - "@drpatelh" diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf similarity index 72% rename from subworkflows/nf-core/bam_sort_samtools.nf rename to subworkflows/nf-core/bam_sort_stats_samtools/main.nf index d1e6c74c..617871fe 100644 --- a/subworkflows/nf-core/bam_sort_samtools.nf +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -2,31 +2,26 @@ // Sort, index BAM file and run samtools stats, flagstat and idxstats // -include { SAMTOOLS_SORT } from '../../modules/nf-core/modules/samtools/sort/main' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' -include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' -workflow BAM_SORT_SAMTOOLS { +workflow BAM_SORT_STATS_SAMTOOLS { take: - ch_bam // channel: [ val(meta), [ bam ] ] + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ fasta ] main: ch_versions = Channel.empty() - SAMTOOLS_SORT ( - ch_bam - ) + SAMTOOLS_SORT ( ch_bam ) ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) - SAMTOOLS_INDEX ( - SAMTOOLS_SORT.out.bam - ) + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) - SAMTOOLS_SORT - .out - .bam + SAMTOOLS_SORT.out.bam .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) .map { @@ -39,9 +34,7 @@ workflow BAM_SORT_SAMTOOLS { } .set { ch_bam_bai } - BAM_STATS_SAMTOOLS ( - ch_bam_bai - ) + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) emit: diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml new file mode 100644 index 00000000..131065be --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -0,0 +1,65 @@ +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +modules: + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf deleted file mode 100644 index 68d632c3..00000000 --- a/subworkflows/nf-core/bam_stats_samtools.nf +++ /dev/null @@ -1,38 +0,0 @@ -// -// Run SAMtools stats, flagstat and idxstats -// - -include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' -include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/modules/samtools/idxstats/main' -include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/modules/samtools/flagstat/main' - -workflow BAM_STATS_SAMTOOLS { - take: - ch_bam_bai // channel: [ val(meta), [ bam ], [bai/csi] ] - - main: - ch_versions = Channel.empty() - - SAMTOOLS_STATS ( - ch_bam_bai, - [] - ) - ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) - - SAMTOOLS_FLAGSTAT ( - ch_bam_bai - ) - ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) - - SAMTOOLS_IDXSTATS ( - ch_bam_bai - ) - ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first()) - - emit: - stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf new file mode 100644 index 00000000..c9d7c8b7 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -0,0 +1,32 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ path(fasta) ] + + main: + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml new file mode 100644 index 00000000..b6072686 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -0,0 +1,40 @@ +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +modules: + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - ch_bam_bai: + description: | + The input channel containing the BAM/CRAM and it's index + Structure: [ val(meta), path(bam), path(bai) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] +output: + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@drpatelh" diff --git a/subworkflows/nf-core/fastq_align_bowtie2/main.nf b/subworkflows/nf-core/fastq_align_bowtie2/main.nf new file mode 100644 index 00000000..eaf4ac5e --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/main.nf @@ -0,0 +1,45 @@ +// +// Alignment with Bowtie2 +// + +include { BOWTIE2_ALIGN } from '../../../modules/nf-core/bowtie2/align/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_BOWTIE2 { + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_index // channel: /path/to/bowtie2/index/ + save_unaligned // val + sort_bam // val + ch_fasta // channel: /path/to/reference.fasta + + main: + + ch_versions = Channel.empty() + + // + // Map reads with Bowtie2 + // + BOWTIE2_ALIGN ( ch_reads, ch_index, save_unaligned, sort_bam ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( BOWTIE2_ALIGN.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam_orig = BOWTIE2_ALIGN.out.bam // channel: [ val(meta), bam ] + log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] + fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_align_bowtie2/meta.yml b/subworkflows/nf-core/fastq_align_bowtie2/meta.yml new file mode 100644 index 00000000..ad378077 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/meta.yml @@ -0,0 +1,63 @@ +name: fastq_align_bowtie2 +description: Align reads to a reference genome using bowtie2 then sort with samtools +keywords: + - align + - fasta + - genome + - reference +modules: + - bowtie2/align + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ch_reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - ch_index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - ch_fasta: + type: file + description: Reference fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Alignment log + pattern: "*.log" +# TODO Add samtools outputs +authors: + - "@drpatelh" diff --git a/subworkflows/nf-core/primer_trim_ivar.nf b/subworkflows/nf-core/primer_trim_ivar.nf deleted file mode 100644 index e3046bb0..00000000 --- a/subworkflows/nf-core/primer_trim_ivar.nf +++ /dev/null @@ -1,45 +0,0 @@ -// -// iVar trim, sort, index BAM file and run samtools stats, flagstat and idxstats -// - -include { IVAR_TRIM } from '../../modules/nf-core/modules/ivar/trim/main' -include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' - -workflow PRIMER_TRIM_IVAR { - take: - bam // channel: [ val(meta), [ bam ], [bai] ] - bed // path : bed - - main: - - ch_versions = Channel.empty() - - // - // iVar trim primers - // - IVAR_TRIM ( - bam, - bed - ) - ch_versions = ch_versions.mix(IVAR_TRIM.out.versions.first()) - - // - // Sort, index BAM file and run samtools stats, flagstat and idxstats - // - BAM_SORT_SAMTOOLS ( - IVAR_TRIM.out.bam - ) - ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) - - emit: - bam_orig = IVAR_TRIM.out.bam // channel: [ val(meta), bam ] - log_out = IVAR_TRIM.out.log // channel: [ val(meta), log ] - - bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] - bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] - stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] - flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] - idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] - - versions = ch_versions // channel: [ versions.yml ] -} diff --git a/tower.yml b/tower.yml new file mode 100644 index 00000000..68f9e947 --- /dev/null +++ b/tower.yml @@ -0,0 +1,17 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + summary_variants_metrics_mqc.csv: + display: "Summary variant calling metrics CSV file" + summary_assembly_metrics_mqc.csv: + display: "Summary assembly metrics CSV file" + variants_long_table.csv: + display: "Variants long table with functional effect prediction and lineage analysis" + all_samples.mosdepth.heatmap.pdf: + display: "All samples amplicon coverage heatmap PDF file" + report.pdf: + display: "QUAST PDF report" + "*.mosdepth.coverage.pdf": + display: "Per-sample amplicon coverage PDF file" + "**/bandage/*.png": + display: "Assembly BANDAGE image" diff --git a/workflows/illumina.nf b/workflows/illumina.nf index f4010465..7d44924f 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE INPUTS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ def valid_params = [ @@ -34,9 +34,9 @@ def variant_caller = params.variant_caller if (!variant_caller) { variant_caller = params.protocol == 'amplicon' ? 'ivar' : 'bcftools' } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ch_multiqc_config = file("$projectDir/assets/multiqc_config_illumina.yml", checkIfExists: true) @@ -47,9 +47,9 @@ ch_blast_outfmt6_header = file("$projectDir/assets/headers/blast_outfmt6_hea ch_ivar_variants_header_mqc = file("$projectDir/assets/headers/ivar_variants_header_mqc.txt", checkIfExists: true) /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // @@ -59,53 +59,50 @@ include { CUTADAPT } from '../modules/local/cutadapt' include { MULTIQC } from '../modules/local/multiqc_illumina' include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_GENOME } from '../modules/local/plot_mosdepth_regions' include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_AMPLICON } from '../modules/local/plot_mosdepth_regions' -include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_FAIL_READS } from '../modules/local/multiqc_tsv_from_list' -include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_FAIL_MAPPED } from '../modules/local/multiqc_tsv_from_list' -include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_NEXTCLADE } from '../modules/local/multiqc_tsv_from_list' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules // -include { INPUT_CHECK } from '../subworkflows/local/input_check' -include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome_illumina' -include { VARIANTS_IVAR } from '../subworkflows/local/variants_ivar' -include { VARIANTS_BCFTOOLS } from '../subworkflows/local/variants_bcftools' -include { CONSENSUS_IVAR } from '../subworkflows/local/consensus_ivar' -include { CONSENSUS_BCFTOOLS } from '../subworkflows/local/consensus_bcftools' -include { VARIANTS_LONG_TABLE } from '../subworkflows/local/variants_long_table' -include { ASSEMBLY_SPADES } from '../subworkflows/local/assembly_spades' -include { ASSEMBLY_UNICYCLER } from '../subworkflows/local/assembly_unicycler' -include { ASSEMBLY_MINIA } from '../subworkflows/local/assembly_minia' +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome_illumina' +include { VARIANTS_IVAR } from '../subworkflows/local/variants_ivar' +include { VARIANTS_BCFTOOLS } from '../subworkflows/local/variants_bcftools' +include { CONSENSUS_IVAR } from '../subworkflows/local/consensus_ivar' +include { CONSENSUS_BCFTOOLS } from '../subworkflows/local/consensus_bcftools' +include { VARIANTS_LONG_TABLE } from '../subworkflows/local/variants_long_table' +include { ASSEMBLY_SPADES } from '../subworkflows/local/assembly_spades' +include { ASSEMBLY_UNICYCLER } from '../subworkflows/local/assembly_unicycler' +include { ASSEMBLY_MINIA } from '../subworkflows/local/assembly_minia' +include { BAM_TRIM_PRIMERS_IVAR } from '../subworkflows/local/bam_trim_primers_ivar' +include { FASTQ_TRIM_FASTP_FASTQC } from '../subworkflows/local/fastq_trim_fastp_fastqc' /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // // MODULE: Installed directly from nf-core/modules // -include { CAT_FASTQ } from '../modules/nf-core/modules/cat/fastq/main' -include { FASTQC } from '../modules/nf-core/modules/fastqc/main' -include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/modules/kraken2/kraken2/main' -include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/modules/picard/collectmultiplemetrics/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' -include { MOSDEPTH as MOSDEPTH_GENOME } from '../modules/nf-core/modules/mosdepth/main' -include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/modules/mosdepth/main' +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/kraken2/kraken2/main' +include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/picard/collectmultiplemetrics/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { MOSDEPTH as MOSDEPTH_GENOME } from '../modules/nf-core/mosdepth/main' +include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/mosdepth/main' // // SUBWORKFLOW: Consisting entirely of nf-core/modules // -include { FASTQC_FASTP } from '../subworkflows/nf-core/fastqc_fastp' -include { ALIGN_BOWTIE2 } from '../subworkflows/nf-core/align_bowtie2' -include { PRIMER_TRIM_IVAR } from '../subworkflows/nf-core/primer_trim_ivar' -include { MARK_DUPLICATES_PICARD } from '../subworkflows/nf-core/mark_duplicates_picard' +include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main' +include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard/main' /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // Info required for completion email and summary @@ -198,13 +195,14 @@ workflow ILLUMINA { // // SUBWORKFLOW: Read QC and trim adapters // - FASTQC_FASTP ( + FASTQ_TRIM_FASTP_FASTQC ( ch_cat_fastq, + [], params.save_trimmed_fail, false ) - ch_variants_fastq = FASTQC_FASTP.out.reads - ch_versions = ch_versions.mix(FASTQC_FASTP.out.versions) + ch_variants_fastq = FASTQ_TRIM_FASTP_FASTQC.out.reads + ch_versions = ch_versions.mix(FASTQ_TRIM_FASTP_FASTQC.out.versions) // // Filter empty FastQ files after adapter trimming @@ -212,7 +210,7 @@ workflow ILLUMINA { ch_fail_reads_multiqc = Channel.empty() if (!params.skip_fastp) { ch_variants_fastq - .join(FASTQC_FASTP.out.trim_json) + .join(FASTQ_TRIM_FASTP_FASTQC.out.trim_json) .map { meta, reads, json -> pass = WorkflowIllumina.getFastpReadsAfterFiltering(json) > 0 @@ -233,14 +231,13 @@ workflow ILLUMINA { return [ "$meta.id\t$num_reads" ] } } - .set { ch_pass_fail_reads } - - MULTIQC_TSV_FAIL_READS ( - ch_pass_fail_reads.collect(), - ['Sample', 'Reads before trimming'], - 'fail_mapped_reads' - ) - .set { ch_fail_reads_multiqc } + .collect() + .map { + tsv_data -> + def header = ['Sample', 'Reads before trimming'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_fail_reads_multiqc } } // @@ -275,17 +272,18 @@ workflow ILLUMINA { ch_bowtie2_multiqc = Channel.empty() ch_bowtie2_flagstat_multiqc = Channel.empty() if (!params.skip_variants) { - ALIGN_BOWTIE2 ( + FASTQ_ALIGN_BOWTIE2 ( ch_variants_fastq, PREPARE_GENOME.out.bowtie2_index, params.save_unaligned, - false + false, + PREPARE_GENOME.out.fasta ) - ch_bam = ALIGN_BOWTIE2.out.bam - ch_bai = ALIGN_BOWTIE2.out.bai - ch_bowtie2_multiqc = ALIGN_BOWTIE2.out.log_out - ch_bowtie2_flagstat_multiqc = ALIGN_BOWTIE2.out.flagstat - ch_versions = ch_versions.mix(ALIGN_BOWTIE2.out.versions) + ch_bam = FASTQ_ALIGN_BOWTIE2.out.bam + ch_bai = FASTQ_ALIGN_BOWTIE2.out.bai + ch_bowtie2_multiqc = FASTQ_ALIGN_BOWTIE2.out.log_out + ch_bowtie2_flagstat_multiqc = FASTQ_ALIGN_BOWTIE2.out.flagstat + ch_versions = ch_versions.mix(FASTQ_ALIGN_BOWTIE2.out.versions) } // @@ -318,12 +316,15 @@ workflow ILLUMINA { } .set { ch_pass_fail_mapped } - MULTIQC_TSV_FAIL_MAPPED ( - ch_pass_fail_mapped.fail.collect(), - ['Sample', 'Mapped reads'], - 'fail_mapped_samples' - ) - .set { ch_fail_mapping_multiqc } + ch_pass_fail_mapped + .fail + .collect() + .map { + tsv_data -> + def header = ['Sample', 'Mapped reads'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_fail_mapping_multiqc } } // @@ -331,14 +332,15 @@ workflow ILLUMINA { // ch_ivar_trim_flagstat_multiqc = Channel.empty() if (!params.skip_variants && !params.skip_ivar_trim && params.protocol == 'amplicon') { - PRIMER_TRIM_IVAR ( + BAM_TRIM_PRIMERS_IVAR ( ch_bam.join(ch_bai, by: [0]), - PREPARE_GENOME.out.primer_bed + PREPARE_GENOME.out.primer_bed, + PREPARE_GENOME.out.fasta ) - ch_bam = PRIMER_TRIM_IVAR.out.bam - ch_bai = PRIMER_TRIM_IVAR.out.bai - ch_ivar_trim_flagstat_multiqc = PRIMER_TRIM_IVAR.out.flagstat - ch_versions = ch_versions.mix(PRIMER_TRIM_IVAR.out.versions) + ch_bam = BAM_TRIM_PRIMERS_IVAR.out.bam + ch_bai = BAM_TRIM_PRIMERS_IVAR.out.bai + ch_ivar_trim_flagstat_multiqc = BAM_TRIM_PRIMERS_IVAR.out.flagstat + ch_versions = ch_versions.mix(BAM_TRIM_PRIMERS_IVAR.out.versions) } // @@ -346,13 +348,15 @@ workflow ILLUMINA { // ch_markduplicates_flagstat_multiqc = Channel.empty() if (!params.skip_variants && !params.skip_markduplicates) { - MARK_DUPLICATES_PICARD ( - ch_bam + BAM_MARKDUPLICATES_PICARD ( + ch_bam, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.fai ) - ch_bam = MARK_DUPLICATES_PICARD.out.bam - ch_bai = MARK_DUPLICATES_PICARD.out.bai - ch_markduplicates_flagstat_multiqc = MARK_DUPLICATES_PICARD.out.flagstat - ch_versions = ch_versions.mix(MARK_DUPLICATES_PICARD.out.versions) + ch_bam = BAM_MARKDUPLICATES_PICARD.out.bam + ch_bai = BAM_MARKDUPLICATES_PICARD.out.bai + ch_markduplicates_flagstat_multiqc = BAM_MARKDUPLICATES_PICARD.out.flagstat + ch_versions = ch_versions.mix(BAM_MARKDUPLICATES_PICARD.out.versions) } // @@ -360,9 +364,9 @@ workflow ILLUMINA { // if (!params.skip_variants && !params.skip_picard_metrics) { PICARD_COLLECTMULTIPLEMETRICS ( - ch_bam, - PREPARE_GENOME.out.fasta, - [] + ch_bam.join(ch_bai, by: [0]), + PREPARE_GENOME.out.fasta.map { [ [:], it ] }, + [ [:], [] ] ) ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first().ifEmpty(null)) } @@ -375,8 +379,8 @@ workflow ILLUMINA { if (!params.skip_variants && !params.skip_mosdepth) { MOSDEPTH_GENOME ( ch_bam.join(ch_bai, by: [0]), - [], - [] + [ [:], [] ], + [ [:], [] ] ) ch_mosdepth_multiqc = MOSDEPTH_GENOME.out.global_txt ch_versions = ch_versions.mix(MOSDEPTH_GENOME.out.versions.first().ifEmpty(null)) @@ -389,8 +393,8 @@ workflow ILLUMINA { if (params.protocol == 'amplicon') { MOSDEPTH_AMPLICON ( ch_bam.join(ch_bai, by: [0]), - PREPARE_GENOME.out.primer_collapsed_bed, - [] + PREPARE_GENOME.out.primer_collapsed_bed.map { [ [:], it ] }, + [ [:], [] ] ) ch_versions = ch_versions.mix(MOSDEPTH_AMPLICON.out.versions.first().ifEmpty(null)) @@ -415,9 +419,9 @@ workflow ILLUMINA { VARIANTS_IVAR ( ch_bam, PREPARE_GENOME.out.fasta, - (params.protocol == 'amplicon' || !params.skip_asciigenome) ? PREPARE_GENOME.out.fai : [], - (params.protocol == 'amplicon' || !params.skip_asciigenome) ? PREPARE_GENOME.out.chrom_sizes : [], - PREPARE_GENOME.out.gff, + (params.protocol == 'amplicon' || !params.skip_asciigenome || !params.skip_markduplicates) ? PREPARE_GENOME.out.fai : [], + (params.protocol == 'amplicon' || !params.skip_asciigenome || !params.skip_markduplicates) ? PREPARE_GENOME.out.chrom_sizes : [], + params.gff ? PREPARE_GENOME.out.gff : [], (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], PREPARE_GENOME.out.snpeff_db, PREPARE_GENOME.out.snpeff_config, @@ -439,8 +443,8 @@ workflow ILLUMINA { VARIANTS_BCFTOOLS ( ch_bam, PREPARE_GENOME.out.fasta, - (params.protocol == 'amplicon' || !params.skip_asciigenome) ? PREPARE_GENOME.out.chrom_sizes : [], - PREPARE_GENOME.out.gff, + (params.protocol == 'amplicon' || !params.skip_asciigenome || !params.skip_markduplicates) ? PREPARE_GENOME.out.chrom_sizes : [], + params.gff ? PREPARE_GENOME.out.gff : [], (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], PREPARE_GENOME.out.snpeff_db, PREPARE_GENOME.out.snpeff_config @@ -463,7 +467,7 @@ workflow ILLUMINA { CONSENSUS_IVAR ( ch_bam, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gff, + params.gff ? PREPARE_GENOME.out.gff : [], PREPARE_GENOME.out.nextclade_db ) @@ -482,7 +486,7 @@ workflow ILLUMINA { ch_vcf, ch_tbi, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gff, + params.gff ? PREPARE_GENOME.out.gff : [], PREPARE_GENOME.out.nextclade_db ) @@ -502,14 +506,13 @@ workflow ILLUMINA { def clade = WorkflowCommons.getNextcladeFieldMapFromCsv(csv)['clade'] return [ "$meta.id\t$clade" ] } + .collect() + .map { + tsv_data -> + def header = ['Sample', 'clade'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } .set { ch_nextclade_multiqc } - - MULTIQC_TSV_NEXTCLADE ( - ch_nextclade_multiqc.collect(), - ['Sample', 'clade'], - 'nextclade_clade' - ) - .set { ch_nextclade_multiqc } } // @@ -556,7 +559,7 @@ workflow ILLUMINA { params.spades_mode, ch_spades_hmm, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gff, + params.gff ? PREPARE_GENOME.out.gff : [], PREPARE_GENOME.out.blast_db, ch_blast_outfmt6_header ) @@ -572,7 +575,7 @@ workflow ILLUMINA { ASSEMBLY_UNICYCLER ( ch_assembly_fastq.map { meta, fastq -> [ meta, fastq, [] ] }, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gff, + params.gff ? PREPARE_GENOME.out.gff : [], PREPARE_GENOME.out.blast_db, ch_blast_outfmt6_header ) @@ -588,7 +591,7 @@ workflow ILLUMINA { ASSEMBLY_MINIA ( ch_assembly_fastq, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gff, + params.gff ? PREPARE_GENOME.out.gff : [], PREPARE_GENOME.out.blast_db, ch_blast_outfmt6_header ) @@ -615,11 +618,11 @@ workflow ILLUMINA { ch_multiqc_custom_config, CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect(), ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), - ch_fail_reads_multiqc.ifEmpty([]), - ch_fail_mapping_multiqc.ifEmpty([]), + ch_fail_reads_multiqc.collectFile(name: 'fail_mapped_reads_mqc.tsv').ifEmpty([]), + ch_fail_mapping_multiqc.collectFile(name: 'fail_mapped_samples_mqc.tsv').ifEmpty([]), ch_amplicon_heatmap_multiqc.ifEmpty([]), - FASTQC_FASTP.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([]), - FASTQC_FASTP.out.trim_json.collect{it[1]}.ifEmpty([]), + FASTQ_TRIM_FASTP_FASTQC.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([]), + FASTQ_TRIM_FASTP_FASTQC.out.trim_json.collect{it[1]}.ifEmpty([]), ch_kraken2_multiqc.collect{it[1]}.ifEmpty([]), ch_bowtie2_flagstat_multiqc.collect{it[1]}.ifEmpty([]), ch_bowtie2_multiqc.collect{it[1]}.ifEmpty([]), @@ -631,7 +634,7 @@ workflow ILLUMINA { ch_snpeff_multiqc.collect{it[1]}.ifEmpty([]), ch_quast_multiqc.collect().ifEmpty([]), ch_pangolin_multiqc.collect{it[1]}.ifEmpty([]), - ch_nextclade_multiqc.collect().ifEmpty([]), + ch_nextclade_multiqc.collectFile(name: 'nextclade_clade_mqc.tsv').ifEmpty([]), ch_cutadapt_multiqc.collect{it[1]}.ifEmpty([]), ch_spades_quast_multiqc.collect().ifEmpty([]), ch_unicycler_quast_multiqc.collect().ifEmpty([]), @@ -642,9 +645,9 @@ workflow ILLUMINA { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPLETION EMAIL AND SUMMARY -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow.onComplete { @@ -655,7 +658,7 @@ workflow.onComplete { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index 9bb63231..69854472 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE INPUTS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ def valid_params = [ @@ -33,18 +33,18 @@ if (params.artic_minion_caller == 'medaka') { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ch_multiqc_config = file("$projectDir/assets/multiqc_config_nanopore.yml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? file(params.multiqc_config) : [] /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // @@ -54,11 +54,6 @@ include { ASCIIGENOME } from '../modules/local/asciigenome' include { MULTIQC } from '../modules/local/multiqc_nanopore' include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_GENOME } from '../modules/local/plot_mosdepth_regions' include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_AMPLICON } from '../modules/local/plot_mosdepth_regions' -include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_NO_SAMPLE_NAME } from '../modules/local/multiqc_tsv_from_list' -include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_NO_BARCODES } from '../modules/local/multiqc_tsv_from_list' -include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_BARCODE_COUNT } from '../modules/local/multiqc_tsv_from_list' -include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_GUPPYPLEX_COUNT } from '../modules/local/multiqc_tsv_from_list' -include { MULTIQC_TSV_FROM_LIST as MULTIQC_TSV_NEXTCLADE } from '../modules/local/multiqc_tsv_from_list' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -67,39 +62,35 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check' include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome_nanopore' include { SNPEFF_SNPSIFT } from '../subworkflows/local/snpeff_snpsift' include { VARIANTS_LONG_TABLE } from '../subworkflows/local/variants_long_table' +include { FILTER_BAM_SAMTOOLS } from '../subworkflows/local/filter_bam_samtools' /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // // MODULE: Installed directly from nf-core/modules // -include { PYCOQC } from '../modules/nf-core/modules/pycoqc/main' -include { NANOPLOT } from '../modules/nf-core/modules/nanoplot/main' -include { ARTIC_GUPPYPLEX } from '../modules/nf-core/modules/artic/guppyplex/main' -include { ARTIC_MINION } from '../modules/nf-core/modules/artic/minion/main' -include { VCFLIB_VCFUNIQ } from '../modules/nf-core/modules/vcflib/vcfuniq/main' -include { TABIX_TABIX } from '../modules/nf-core/modules/tabix/tabix/main' -include { BCFTOOLS_STATS } from '../modules/nf-core/modules/bcftools/stats/main' -include { QUAST } from '../modules/nf-core/modules/quast/main' -include { PANGOLIN } from '../modules/nf-core/modules/pangolin/main' -include { NEXTCLADE_RUN } from '../modules/nf-core/modules/nextclade/run/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' -include { MOSDEPTH as MOSDEPTH_GENOME } from '../modules/nf-core/modules/mosdepth/main' -include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/modules/mosdepth/main' - -// -// SUBWORKFLOW: Consisting entirely of nf-core/modules -// -include { FILTER_BAM_SAMTOOLS } from '../subworkflows/nf-core/filter_bam_samtools' +include { PYCOQC } from '../modules/nf-core/pycoqc/main' +include { NANOPLOT } from '../modules/nf-core/nanoplot/main' +include { ARTIC_GUPPYPLEX } from '../modules/nf-core/artic/guppyplex/main' +include { ARTIC_MINION } from '../modules/nf-core/artic/minion/main' +include { VCFLIB_VCFUNIQ } from '../modules/nf-core/vcflib/vcfuniq/main' +include { TABIX_TABIX } from '../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_STATS } from '../modules/nf-core/bcftools/stats/main' +include { QUAST } from '../modules/nf-core/quast/main' +include { PANGOLIN } from '../modules/nf-core/pangolin/main' +include { NEXTCLADE_RUN } from '../modules/nf-core/nextclade/run/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' +include { MOSDEPTH as MOSDEPTH_GENOME } from '../modules/nf-core/mosdepth/main' +include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/mosdepth/main' /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // Info required for completion email and summary @@ -117,7 +108,7 @@ workflow NANOPORE { ch_pycoqc_multiqc = Channel.empty() if (params.sequencing_summary && !params.skip_pycoqc) { PYCOQC ( - ch_sequencing_summary + Channel.of(ch_sequencing_summary).map { [ [:], it ] } ) ch_pycoqc_multiqc = PYCOQC.out.json ch_versions = ch_versions.mix(PYCOQC.out.versions) @@ -189,14 +180,13 @@ workflow NANOPORE { .filter { it[1] == null } .filter { it[-1] >= params.min_barcode_reads } .map { it -> [ "${it[0]}\t${it[-1]}" ] } - .set { ch_barcodes_no_sample } - - MULTIQC_TSV_NO_SAMPLE_NAME ( - ch_barcodes_no_sample.collect(), - ['Barcode', 'Read count'], - 'fail_barcodes_no_sample' - ) - .set { ch_custom_no_sample_name_multiqc } + .collect() + .map { + tsv_data -> + def header = ['Barcode', 'Read count'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_custom_no_sample_name_multiqc } // // MODULE: Create custom content file for MultiQC to report samples that were in samplesheet but have no barcodes @@ -204,14 +194,13 @@ workflow NANOPORE { ch_fastq_dirs .filter { it[-1] == null } .map { it -> [ "${it[1]}\t${it[0]}" ] } - .set { ch_samples_no_barcode } - - MULTIQC_TSV_NO_BARCODES ( - ch_samples_no_barcode.collect(), - ['Sample', 'Missing barcode'], - 'fail_no_barcode_samples' - ) - .set { ch_custom_no_barcodes_multiqc } + .collect() + .map { + tsv_data -> + def header = ['Sample', 'Missing barcode'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_custom_no_barcodes_multiqc } ch_fastq_dirs .filter { (it[1] != null) } @@ -247,11 +236,15 @@ workflow NANOPORE { } .set { ch_pass_fail_barcode_count } - MULTIQC_TSV_BARCODE_COUNT ( - ch_pass_fail_barcode_count.fail.collect(), - ['Sample', 'Barcode count'], - 'fail_barcode_count_samples' - ) + ch_pass_fail_barcode_count + .fail + .collect() + .map { + tsv_data -> + def header = ['Sample', 'Barcode count'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_custom_fail_barcodes_count_multiqc } // Re-arrange channels to have meta map of information for sample ch_fastq_dirs @@ -282,11 +275,15 @@ workflow NANOPORE { } .set { ch_pass_fail_guppyplex_count } - MULTIQC_TSV_GUPPYPLEX_COUNT ( - ch_pass_fail_guppyplex_count.fail.collect(), - ['Sample', 'Read count'], - 'fail_guppyplex_count_samples' - ) + ch_pass_fail_guppyplex_count + .fail + .collect() + .map { + tsv_data -> + def header = ['Sample', 'Read count'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_custom_fail_guppyplex_count_multiqc } // // MODULE: Nanoplot QC for FastQ files @@ -305,8 +302,8 @@ workflow NANOPORE { ARTIC_GUPPYPLEX.out.fastq.filter { it[-1].countFastq() > params.min_guppyplex_reads }, ch_fast5_dir, ch_sequencing_summary, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.primer_bed, + PREPARE_GENOME.out.fasta.collect(), + PREPARE_GENOME.out.primer_bed.collect(), ch_medaka_model.collect().ifEmpty([]), params.artic_minion_medaka_model ?: '', params.artic_scheme, @@ -334,7 +331,10 @@ workflow NANOPORE { // MODULE: VCF stats with bcftools stats // BCFTOOLS_STATS ( - VCFLIB_VCFUNIQ.out.vcf + VCFLIB_VCFUNIQ.out.vcf.join(TABIX_TABIX.out.tbi, by: [0]), + [], + [], + [] ) ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions.first().ifEmpty(null)) @@ -356,8 +356,8 @@ workflow NANOPORE { MOSDEPTH_GENOME ( ARTIC_MINION.out.bam_primertrimmed.join(ARTIC_MINION.out.bai_primertrimmed, by: [0]), - [], - [] + [ [:], [] ], + [ [:], [] ] ) ch_mosdepth_multiqc = MOSDEPTH_GENOME.out.global_txt ch_versions = ch_versions.mix(MOSDEPTH_GENOME.out.versions.first().ifEmpty(null)) @@ -369,8 +369,8 @@ workflow NANOPORE { MOSDEPTH_AMPLICON ( ARTIC_MINION.out.bam_primertrimmed.join(ARTIC_MINION.out.bai_primertrimmed, by: [0]), - PREPARE_GENOME.out.primer_collapsed_bed, - [] + PREPARE_GENOME.out.primer_collapsed_bed.map { [ [:], it ] }.collect(), + [ [:], [] ] ) ch_versions = ch_versions.mix(MOSDEPTH_AMPLICON.out.versions.first().ifEmpty(null)) @@ -400,7 +400,7 @@ workflow NANOPORE { if (!params.skip_nextclade) { NEXTCLADE_RUN ( ARTIC_MINION.out.fasta, - PREPARE_GENOME.out.nextclade_db + PREPARE_GENOME.out.nextclade_db.collect() ) ch_versions = ch_versions.mix(NEXTCLADE_RUN.out.versions.first().ifEmpty(null)) @@ -410,18 +410,18 @@ workflow NANOPORE { NEXTCLADE_RUN .out .csv - .map { meta, csv -> - def clade = WorkflowCommons.getNextcladeFieldMapFromCsv(csv)['clade'] - return [ "$meta.id\t$clade" ] + .map { + meta, csv -> + def clade = WorkflowCommons.getNextcladeFieldMapFromCsv(csv)['clade'] + return [ "$meta.id\t$clade" ] + } + .collect() + .map { + tsv_data -> + def header = ['Sample', 'clade'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) } .set { ch_nextclade_multiqc } - - MULTIQC_TSV_NEXTCLADE ( - ch_nextclade_multiqc.collect(), - ['Sample', 'clade'], - 'nextclade_clade' - ) - .set { ch_nextclade_multiqc } } // @@ -431,8 +431,8 @@ workflow NANOPORE { if (!params.skip_variants_quast) { QUAST ( ARTIC_MINION.out.fasta.collect{ it[1] }, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.gff, + PREPARE_GENOME.out.fasta.collect(), + params.gff ? PREPARE_GENOME.out.gff : [], true, params.gff ) @@ -448,9 +448,9 @@ workflow NANOPORE { if (params.gff && !params.skip_snpeff) { SNPEFF_SNPSIFT ( VCFLIB_VCFUNIQ.out.vcf, - PREPARE_GENOME.out.snpeff_db, - PREPARE_GENOME.out.snpeff_config, - PREPARE_GENOME.out.fasta + PREPARE_GENOME.out.snpeff_db.collect(), + PREPARE_GENOME.out.snpeff_config.collect(), + PREPARE_GENOME.out.fasta.collect() ) ch_snpeff_multiqc = SNPEFF_SNPSIFT.out.csv ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt @@ -475,10 +475,10 @@ workflow NANOPORE { ASCIIGENOME ( ch_asciigenome, - PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.chrom_sizes, - PREPARE_GENOME.out.gff, - PREPARE_GENOME.out.primer_bed, + PREPARE_GENOME.out.fasta.collect(), + PREPARE_GENOME.out.chrom_sizes.collect(), + params.gff ? PREPARE_GENOME.out.gff : [], + PREPARE_GENOME.out.primer_bed.collect(), params.asciigenome_window_size, params.asciigenome_read_depth ) @@ -517,12 +517,12 @@ workflow NANOPORE { ch_multiqc_custom_config, CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect(), ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), - ch_custom_no_sample_name_multiqc.ifEmpty([]), - ch_custom_no_barcodes_multiqc.ifEmpty([]), - MULTIQC_TSV_BARCODE_COUNT.out.ifEmpty([]), - MULTIQC_TSV_GUPPYPLEX_COUNT.out.ifEmpty([]), + ch_custom_no_sample_name_multiqc.collectFile(name: 'fail_barcodes_no_sample_mqc.tsv').ifEmpty([]), + ch_custom_no_barcodes_multiqc.collectFile(name: 'fail_no_barcode_samples_mqc.tsv').ifEmpty([]), + ch_custom_fail_barcodes_count_multiqc.collectFile(name: 'fail_barcode_count_samples_mqc.tsv').ifEmpty([]), + ch_custom_fail_guppyplex_count_multiqc.collectFile(name: 'fail_guppyplex_count_samples_mqc.tsv').ifEmpty([]), ch_amplicon_heatmap_multiqc.ifEmpty([]), - ch_pycoqc_multiqc.collect().ifEmpty([]), + ch_pycoqc_multiqc.collect{it[1]}.ifEmpty([]), ARTIC_MINION.out.json.collect{it[1]}.ifEmpty([]), FILTER_BAM_SAMTOOLS.out.flagstat.collect{it[1]}.ifEmpty([]), BCFTOOLS_STATS.out.stats.collect{it[1]}.ifEmpty([]), @@ -530,16 +530,16 @@ workflow NANOPORE { ch_quast_multiqc.collect().ifEmpty([]), ch_snpeff_multiqc.collect{it[1]}.ifEmpty([]), ch_pangolin_multiqc.collect{it[1]}.ifEmpty([]), - ch_nextclade_multiqc.collect().ifEmpty([]) + ch_nextclade_multiqc.collectFile(name: 'nextclade_clade_mqc.tsv').ifEmpty([]) ) multiqc_report = MULTIQC.out.report.toList() } } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPLETION EMAIL AND SUMMARY -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow.onComplete { @@ -550,7 +550,7 @@ workflow.onComplete { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */