diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4ecfbfe3..b290e090 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -10,15 +10,7 @@ "vscode": { // Set *default* container specific settings.json values on container create. "settings": { - "python.defaultInterpreterPath": "/opt/conda/bin/python", - "python.linting.enabled": true, - "python.linting.pylintEnabled": true, - "python.formatting.autopep8Path": "/opt/conda/bin/autopep8", - "python.formatting.yapfPath": "/opt/conda/bin/yapf", - "python.linting.flake8Path": "/opt/conda/bin/flake8", - "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle", - "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle", - "python.linting.pylintPath": "/opt/conda/bin/pylint" + "python.defaultInterpreterPath": "/opt/conda/bin/python" }, // Add the IDs of extensions you want installed when the container is created. diff --git a/.editorconfig b/.editorconfig index 84a786d8..c018cacc 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,7 +8,7 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{md,yml,yaml,html,css,scss,js}] +[*.{md,yml,yaml,html,css,scss,js,cff}] indent_size = 2 # These files are edited and tested upstream in nf-core/modules @@ -18,25 +18,20 @@ end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset -indent_size = unset - -[/assets/email*] -indent_size = unset - -# To prevent errors for these test diamond databases -[/assets/test*/*.dmnd] +[/subworkflows/nf-core/**] charset = unset end_of_line = unset insert_final_newline = unset trim_trailing_whitespace = unset indent_style = unset + +[/assets/email*] indent_size = unset -# To prevent errors for these test blastn databases -[/assets/test*/nt_*/*.{ndb,nhr,nin,nog,nos,not,nsq,ntf,nto}] -charset = unset -end_of_line = unset -insert_final_newline = unset -trim_trailing_whitespace = unset +# ignore python and markdown +[*.{py,md}] indent_style = unset -indent_size = unset + +# ignore ro-crate metadata files +[**/ro-crate-metadata.json] +insert_final_newline = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 1ff8015f..d246695d 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,4 +1,4 @@ -# sanger-tol/blobtoolkit: Contributing Guidelines +# `sanger-tol/blobtoolkit`: Contributing Guidelines Hi there! Many thanks for taking an interest in improving sanger-tol/blobtoolkit. @@ -16,15 +16,18 @@ If you'd like to write some code for sanger-tol/blobtoolkit, the standard workfl 1. Check that there isn't already an issue about your idea in the [sanger-tol/blobtoolkit issues](https://github.com/sanger-tol/blobtoolkit/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [sanger-tol/blobtoolkit repository](https://github.com/sanger-tol/blobtoolkit) to your GitHub account 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) -4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +4. Use `nf-core pipelines schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). 5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). ## Tests -You can optionally test your changes by running the pipeline locally. Then it is recommended to use the `debug` profile to -receive warnings about process selectors and other debug info. Example: `nextflow run . -profile debug,test,docker --outdir `. +You have the option to test your changes locally by running the pipeline. For receiving warnings about process selectors and other `debug` information, it is recommended to use the debug profile. Execute all the tests with the following command: + +```bash +nf-test test --profile debug,test,docker --verbose +``` When you create a pull request with changes, [GitHub Actions](https://github.com/features/actions) will run automatic tests. Typically, pull-requests are only fully reviewed when these tests are passing, though of course we can help out before then. @@ -34,7 +37,7 @@ There are typically two types of tests that run: ### Lint tests `nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. -To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. +To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core pipelines lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. @@ -49,23 +52,23 @@ These tests are run both with the latest available version of `Nextflow` and als :warning: Only in the unlikely and regretful event of a release happening with a bug. -- On your own fork, make a new branch `patch` based on `upstream/master`. +- On your own fork, make a new branch `patch` based on `upstream/main` or `upstream/master`. - Fix the bug, and bump version (X.Y.Z+1). -- A PR should be made on `master` from patch to directly this particular bug. +- Open a pull-request from `patch` to `main`/`master` with the changes. ## Pipeline contribution conventions -To make the sanger-tol/blobtoolkit code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. +To make the `sanger-tol/blobtoolkit` code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. ### Adding a new step If you wish to contribute a new step, please use the following coding standards: -1. Define the corresponding input channel into your new process from the expected previous process channel +1. Define the corresponding input channel into your new process from the expected previous process channel. 2. Write the process block (see below). 3. Define the output channel if needed (see below). 4. Add any new parameters to `nextflow.config` with a default (see below). -5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. 8. If applicable, add a new test command in `.github/workflow/ci.yml`. @@ -74,15 +77,15 @@ If you wish to contribute a new step, please use the following coding standards: ### Default values -Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. +Parameters should be initialised / defined with default values within the `params` scope in `nextflow.config`. -Once there, use `nf-core schema build` to add to `nextflow_schema.json`. +Once there, use `nf-core pipelines schema build` to add to `nextflow_schema.json`. ### Default processes resource requirements -Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/main/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. -The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. +The process resources can be passed on to the tool dynamically within the process with the `${task.cpus}` and `${task.memory}` variables in the `script:` block. ### Naming schemes @@ -93,7 +96,7 @@ Please use the following naming schemes, to make it easy to understand what is g ### Nextflow version bumping -If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core pipelines bump-version --nextflow . [min-nf-version]` ### Images and figures diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index b78be63e..63652db4 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -8,15 +8,15 @@ These are the most common things requested on pull requests (PRs). Remember that PRs should be made against the dev branch, unless you're preparing a pipeline release. -Learn more about contributing: [CONTRIBUTING.md](.github/CONTRIBUTING.md) +Learn more about contributing: [CONTRIBUTING.md](https://github.com/sanger-tol/blobtoolkit/tree/main/.github/CONTRIBUTING.md) --> ## PR checklist - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](.github/CONTRIBUTING.md) -- [ ] Make sure your code lints (`nf-core lint`). +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/sanger-tol/blobtoolkit/tree/main/.github/CONTRIBUTING.md) +- [ ] Make sure your code lints (`nf-core pipelines lint`). - [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Check for unexpected warnings in debug mode (`nextflow run . -profile debug,test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index b26623a3..e342b98e 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -1,15 +1,17 @@ name: nf-core branch protection -# This workflow is triggered on PRs to main branch on the repository -# It fails when someone tries to make a PR against the nf-core `main` branch instead of `dev` +# This workflow is triggered on PRs to `main`/`master` branch on the repository +# It fails when someone tries to make a PR against the nf-core `main`/`master` branch instead of `dev` on: pull_request_target: - branches: [main] + branches: + - main + - master jobs: test: runs-on: ubuntu-latest steps: - # PRs to the nf-core repo main branch are only ok if coming from the nf-core repo `dev` or any `patch` branches + # PRs to the nf-core repo main/master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - name: Check PRs if: github.repository == 'sanger-tol/blobtoolkit' run: | @@ -19,10 +21,10 @@ jobs: # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets - name: Post PR comment if: failure() - uses: mshick/add-pr-comment@v1 + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 with: message: | - ## This PR is against the `main` branch :x: + ## This PR is against the `${{github.event.pull_request.base.ref}}` branch :x: * Do not close this PR * Click _Edit_ and change the `base` to `dev` @@ -32,9 +34,9 @@ jobs: Hi @${{ github.event.pull_request.user.login }}, - It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `main` branch. - The `main` branch on nf-core repositories should always contain code from the latest release. - Because of this, PRs to `main` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) ${{github.event.pull_request.base.ref}} branch. + The ${{github.event.pull_request.base.ref}} branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to ${{github.event.pull_request.base.ref}} are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. Note that even after this, the test will continue to show as failing until you push a new commit. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 589d7118..0cdbc6cd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,9 +7,12 @@ on: pull_request: release: types: [published] + workflow_dispatch: env: NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity concurrency: group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" @@ -17,37 +20,66 @@ concurrency: jobs: test: - name: Run pipeline with test data + name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})" # Only run on push if this is the dev branch (merged PRs) if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'sanger-tol/blobtoolkit') }}" runs-on: ubuntu-latest strategy: matrix: NXF_VER: - - "23.04.0" + - "24.04.2" - "latest-everything" + # "conda" is not supported + profile: + - "docker" + - "singularity" + test_name: + - "test" + isMaster: + - ${{ github.base_ref == 'main' }} + # Exclude conda and singularity on dev + exclude: + - isMaster: false + profile: "singularity" steps: - name: Check out pipeline code - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 - - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + - name: Set up Nextflow + uses: nf-core/setup-nextflow@v2 with: version: "${{ matrix.NXF_VER }}" - - name: Download the NCBI taxdump database + - name: Set up Apptainer + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: matrix.profile == 'singularity' run: | - mkdir ncbi_taxdump - curl -L https://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar -C ncbi_taxdump -xzf - + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR - - name: Download the BUSCO lineage database + - name: Set up Miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 + with: + miniconda-version: "latest" + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge,bioconda + + - name: Set up Conda + if: matrix.profile == 'conda' run: | - mkdir busco_database - curl -L https://tolit.cog.sanger.ac.uk/test-data/resources/busco/blobtoolkit.GCA_922984935.2.2023-08-03.lineages.tar.gz | tar -C busco_database -xzf - + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - name: Run pipeline with test data - # You can customise CI pipeline run tests as required - # For example: adding multiple test runs with different parameters - # Remember that you can parallelise this by using strategy.matrix + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker --taxdump $PWD/ncbi_taxdump --busco $PWD/busco_database --outdir ./results + nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results diff --git a/.github/workflows/clean-up.yml b/.github/workflows/clean-up.yml index 694e90ec..0b6b1f27 100644 --- a/.github/workflows/clean-up.yml +++ b/.github/workflows/clean-up.yml @@ -10,7 +10,7 @@ jobs: issues: write pull-requests: write steps: - - uses: actions/stale@v7 + - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9 with: stale-issue-message: "This issue has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment otherwise this issue will be closed in 20 days." stale-pr-message: "This PR has been tagged as awaiting-changes or awaiting-feedback by an nf-core contributor. Remove stale label or add a comment if it is still useful." diff --git a/.github/workflows/download_pipeline.yml b/.github/workflows/download_pipeline.yml new file mode 100644 index 00000000..ab06316e --- /dev/null +++ b/.github/workflows/download_pipeline.yml @@ -0,0 +1,134 @@ +name: Test successful pipeline download with 'nf-core pipelines download' + +# Run the workflow when: +# - dispatched manually +# - when a PR is opened or reopened to main/master branch +# - the head branch of the pull request is updated, i.e. if fixes for a release are pushed last minute to dev. +on: + workflow_dispatch: + inputs: + testbranch: + description: "The specific branch you wish to utilize for the test execution of nf-core pipelines download." + required: true + default: "dev" + pull_request: + types: + - opened + - edited + - synchronize + branches: + - main + - master + pull_request_target: + branches: + - main + - master + +env: + NXF_ANSI_LOG: false + +jobs: + configure: + runs-on: ubuntu-latest + outputs: + REPO_LOWERCASE: ${{ steps.get_repo_properties.outputs.REPO_LOWERCASE }} + REPOTITLE_LOWERCASE: ${{ steps.get_repo_properties.outputs.REPOTITLE_LOWERCASE }} + REPO_BRANCH: ${{ steps.get_repo_properties.outputs.REPO_BRANCH }} + steps: + - name: Get the repository name and current branch + id: get_repo_properties + run: | + echo "REPO_LOWERCASE=${GITHUB_REPOSITORY,,}" >> "$GITHUB_OUTPUT" + echo "REPOTITLE_LOWERCASE=$(basename ${GITHUB_REPOSITORY,,})" >> "$GITHUB_OUTPUT" + echo "REPO_BRANCH=${{ github.event.inputs.testbranch || 'dev' }}" >> "$GITHUB_OUTPUT" + + download: + runs-on: ubuntu-latest + needs: configure + steps: + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: Setup Apptainer + uses: eWaterCycle/setup-apptainer@4bb22c52d4f63406c49e94c804632975787312b3 # v2.0.0 + with: + apptainer-version: 1.3.4 + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install git+https://github.com/nf-core/tools.git@dev + + - name: Make a cache directory for the container images + run: | + mkdir -p ./singularity_container_images + + - name: Download the pipeline + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + run: | + nf-core pipelines download ${{ needs.configure.outputs.REPO_LOWERCASE }} \ + --revision ${{ needs.configure.outputs.REPO_BRANCH }} \ + --outdir ./${{ needs.configure.outputs.REPOTITLE_LOWERCASE }} \ + --compress "none" \ + --container-system 'singularity' \ + --container-library "quay.io" -l "docker.io" -l "community.wave.seqera.io/library/" \ + --container-cache-utilisation 'amend' \ + --download-configuration 'yes' + + - name: Inspect download + run: tree ./${{ needs.configure.outputs.REPOTITLE_LOWERCASE }} + + - name: Inspect container images + run: tree ./singularity_container_images | tee ./container_initial + + - name: Count the downloaded number of container images + id: count_initial + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Initial container image count: $image_count" + echo "IMAGE_COUNT_INITIAL=$image_count" >> "$GITHUB_OUTPUT" + + - name: Run the downloaded pipeline (stub) + id: stub_run_pipeline + continue-on-error: true + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{needs.configure.outputs.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ needs.configure.outputs.REPO_BRANCH }}) -stub -profile test,singularity --outdir ./results + - name: Run the downloaded pipeline (stub run not supported) + id: run_pipeline + if: ${{ steps.stub_run_pipeline.outcome == 'failure' }} + env: + NXF_SINGULARITY_CACHEDIR: ./singularity_container_images + NXF_SINGULARITY_HOME_MOUNT: true + run: nextflow run ./${{ needs.configure.outputs.REPOTITLE_LOWERCASE }}/$( sed 's/\W/_/g' <<< ${{ needs.configure.outputs.REPO_BRANCH }}) -profile test,singularity --outdir ./results + + - name: Count the downloaded number of container images + id: count_afterwards + run: | + image_count=$(ls -1 ./singularity_container_images | wc -l | xargs) + echo "Post-pipeline run container image count: $image_count" + echo "IMAGE_COUNT_AFTER=$image_count" >> "$GITHUB_OUTPUT" + + - name: Compare container image counts + run: | + if [ "${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }}" -ne "${{ steps.count_afterwards.outputs.IMAGE_COUNT_AFTER }}" ]; then + initial_count=${{ steps.count_initial.outputs.IMAGE_COUNT_INITIAL }} + final_count=${{ steps.count_afterwards.outputs.IMAGE_COUNT_AFTER }} + difference=$((final_count - initial_count)) + echo "$difference additional container images were \n downloaded at runtime . The pipeline has no support for offline runs!" + tree ./singularity_container_images > ./container_afterwards + diff ./container_initial ./container_afterwards + exit 1 + else + echo "The pipeline can be downloaded successfully!" + fi diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml index d5f2a971..ff6e275c 100644 --- a/.github/workflows/fix-linting.yml +++ b/.github/workflows/fix-linting.yml @@ -4,7 +4,7 @@ on: types: [created] jobs: - deploy: + fix-linting: # Only run if comment is on a PR with the main repo, and if it contains the magic keywords if: > contains(github.event.comment.html_url, '/pull/') && @@ -13,10 +13,17 @@ jobs: runs-on: ubuntu-latest steps: # Use the @sanger-tolsoft token to check out so we can push later - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 with: token: ${{ secrets.sangertolsoft_access_token }} + # indication that the linting is being fixed + - name: React on comment + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: eyes + # Action runs on the issue comment, so we don't get the PR by default # Use the gh cli to check out the PR - name: Checkout Pull Request @@ -24,32 +31,59 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.sangertolsoft_access_token }} - - uses: actions/setup-node@v4 + # Install and run pre-commit + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + with: + python-version: "3.12" - - name: Install Prettier - run: npm install -g prettier @prettier/plugin-php + - name: Install pre-commit + run: pip install pre-commit - # Check that we actually need to fix something - - name: Run 'prettier --check' - id: prettier_status - run: | - if prettier --check ${GITHUB_WORKSPACE}; then - echo "result=pass" >> $GITHUB_OUTPUT - else - echo "result=fail" >> $GITHUB_OUTPUT - fi + - name: Run pre-commit + id: pre-commit + run: pre-commit run --all-files + continue-on-error: true - - name: Run 'prettier --write' - if: steps.prettier_status.outputs.result == 'fail' - run: prettier --write ${GITHUB_WORKSPACE} + # indication that the linting has finished + - name: react if linting finished succesfully + if: steps.pre-commit.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: "+1" - name: Commit & push changes - if: steps.prettier_status.outputs.result == 'fail' + id: commit-and-push + if: steps.pre-commit.outcome == 'failure' run: | git config user.email "105875386+sanger-tolsoft@users.noreply.github.com" git config user.name "sanger-tolsoft" git config push.default upstream git add . git status - git commit -m "[automated] Fix linting with Prettier" + git commit -m "[automated] Fix code linting" git push + + - name: react if linting errors were fixed + id: react-if-fixed + if: steps.commit-and-push.outcome == 'success' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: hooray + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + comment-id: ${{ github.event.comment.id }} + reactions: confused + + - name: react if linting errors were not fixed + if: steps.commit-and-push.outcome == 'failure' + uses: peter-evans/create-or-update-comment@71345be0265236311c031f5c7866368bd1eff043 # v4 + with: + issue-number: ${{ github.event.issue.number }} + body: | + @${{ github.actor }} I tried to fix the linting errors, but it didn't work. Please fix them manually. + See [CI log](https://github.com/sanger-tol/blobtoolkit/actions/runs/${{ github.run_id }}) for more details. diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 8d12b98a..dbd52d5a 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,6 +1,6 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure +# It runs the `nf-core pipelines lint` and markdown lint tests to ensure # that the code meets the nf-core guidelines. on: push: @@ -11,87 +11,62 @@ on: types: [published] jobs: - EditorConfig: + pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - - uses: actions/setup-node@v4 - - - name: Install editorconfig-checker - run: npm install -g editorconfig-checker - - - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - - Prettier: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - uses: actions/setup-node@v4 - - - name: Install Prettier - run: npm install -g prettier@3.1.0 - - - name: Run Prettier --check - run: prettier --check ${GITHUB_WORKSPACE} - - PythonBlack: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Check code lints with Black - uses: psf/black@stable - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 + - name: Set up Python 3.12 + uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 with: - message: | - ## Python linting (`black`) is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` - * Fix formatting errors in your pipeline: `black .` + python-version: "3.12" - Once you push these changes the test should pass, and you can hide this comment :+1: + - name: Install pre-commit + run: pip install pre-commit - We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! - - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run pre-commit + run: pre-commit run --all-files nf-core: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@v4 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - name: Install Nextflow - uses: nf-core/setup-nextflow@v1 + uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 with: - python-version: "3.11" + python-version: "3.12" architecture: "x64" + - name: read .nf-core.yml + uses: pietrobolcato/action-read-yaml@1.1.0 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + - name: Install dependencies run: | python -m pip install --upgrade pip - pip install nf-core==2.11 + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Run nf-core pipelines lint + if: ${{ github.base_ref != 'master' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - - name: Run nf-core lint + - name: Run nf-core pipelines lint --release + if: ${{ github.base_ref == 'master' }} env: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md - name: Save PR number if: ${{ always() }} @@ -99,7 +74,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 0bbcd30f..95b6b6af 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@v2 + uses: dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@v2 + uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/release-announcements.yml b/.github/workflows/release-announcements.yml deleted file mode 100644 index 6ad33927..00000000 --- a/.github/workflows/release-announcements.yml +++ /dev/null @@ -1,68 +0,0 @@ -name: release-announcements -# Automatic release toot and tweet anouncements -on: - release: - types: [published] - workflow_dispatch: - -jobs: - toot: - runs-on: ubuntu-latest - steps: - - uses: rzr/fediverse-action@master - with: - access-token: ${{ secrets.MASTODON_ACCESS_TOKEN }} - host: "mstdn.science" # custom host if not "mastodon.social" (default) - # GitHub event payload - # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#release - message: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - - send-tweet: - runs-on: ubuntu-latest - - steps: - - uses: actions/setup-python@v4 - with: - python-version: "3.10" - - name: Install dependencies - run: pip install tweepy==4.14.0 - - name: Send tweet - shell: python - run: | - import os - import tweepy - - client = tweepy.Client( - access_token=os.getenv("TWITTER_ACCESS_TOKEN"), - access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), - consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), - consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"), - ) - tweet = os.getenv("TWEET") - client.create_tweet(text=tweet) - env: - TWEET: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - TWITTER_CONSUMER_KEY: ${{ secrets.TWITTER_CONSUMER_KEY }} - TWITTER_CONSUMER_SECRET: ${{ secrets.TWITTER_CONSUMER_SECRET }} - TWITTER_ACCESS_TOKEN: ${{ secrets.TWITTER_ACCESS_TOKEN }} - TWITTER_ACCESS_TOKEN_SECRET: ${{ secrets.TWITTER_ACCESS_TOKEN_SECRET }} - - bsky-post: - runs-on: ubuntu-latest - steps: - - uses: zentered/bluesky-post-action@v0.0.2 - with: - post: | - Pipeline release! ${{ github.repository }} v${{ github.event.release.tag_name }} - ${{ github.event.release.name }}! - - Please see the changelog: ${{ github.event.release.html_url }} - env: - BSKY_IDENTIFIER: ${{ secrets.BSKY_IDENTIFIER }} - BSKY_PASSWORD: ${{ secrets.BSKY_PASSWORD }} - # diff --git a/.github/workflows/sanger_test.yml b/.github/workflows/sanger_test.yml index cc2f2a19..00e0c2fb 100644 --- a/.github/workflows/sanger_test.yml +++ b/.github/workflows/sanger_test.yml @@ -26,7 +26,7 @@ jobs: "use_work_dir_as_temp": true, } profiles: test,sanger,singularity,cleanup - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/sanger_test_full.yml b/.github/workflows/sanger_test_full.yml index b44c29f4..f67779ef 100644 --- a/.github/workflows/sanger_test_full.yml +++ b/.github/workflows/sanger_test_full.yml @@ -26,7 +26,7 @@ jobs: with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} - compute_env: ${{ secrets.TOWER_COMPUTE_ENV_LARGE }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} revision: ${{ env.REVISION }} workdir: ${{ secrets.TOWER_WORKDIR_PARENT }}/work/${{ github.repository }}/work-${{ env.REVISION }} parameters: | @@ -34,7 +34,7 @@ jobs: "outdir": "${{ secrets.TOWER_WORKDIR_PARENT }}/results/${{ github.repository }}/results-${{ env.REVISION }}", } profiles: test_full,sanger,singularity,cleanup - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: Tower debug log file path: | diff --git a/.github/workflows/template_version_comment.yml b/.github/workflows/template_version_comment.yml new file mode 100644 index 00000000..537529bc --- /dev/null +++ b/.github/workflows/template_version_comment.yml @@ -0,0 +1,46 @@ +name: nf-core template version comment +# This workflow is triggered on PRs to check if the pipeline template version matches the latest nf-core version. +# It posts a comment to the PR, even if it comes from a fork. + +on: pull_request_target + +jobs: + template_version: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + ref: ${{ github.event.pull_request.head.sha }} + + - name: Read template version from .nf-core.yml + uses: nichmor/minimal-read-yaml@v0.0.2 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + + - name: Install nf-core + run: | + python -m pip install --upgrade pip + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Check nf-core outdated + id: nf_core_outdated + run: echo "OUTPUT=$(pip list --outdated | grep nf-core)" >> ${GITHUB_ENV} + + - name: Post nf-core template version comment + uses: mshick/add-pr-comment@b8f338c590a895d50bcbfa6c5859251edc8952fc # v2 + if: | + contains(env.OUTPUT, 'nf-core') + with: + repo-token: ${{ secrets.NF_CORE_BOT_AUTH_TOKEN }} + allow-repeats: false + message: | + > [!WARNING] + > Newer version of the nf-core template is available. + > + > Your pipeline is using an old version of the nf-core template: ${{ steps.read_yml.outputs['nf_core_version'] }}. + > Please update your pipeline to the latest version. + > + > For more documentation on how to update your pipeline, please see the [nf-core documentation](https://github.com/nf-core/tools?tab=readme-ov-file#sync-a-pipeline-with-the-template) and [Synchronisation documentation](https://nf-co.re/docs/contributing/sync). + # diff --git a/.gitignore b/.gitignore index 5124c9ac..a42ce016 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ results/ testing/ testing* *.pyc +null/ diff --git a/.gitpod.yml b/.gitpod.yml index acf72695..83599f63 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,18 +4,7 @@ tasks: command: | pre-commit install --install-hooks nextflow self-update - - name: unset JAVA_TOOL_OPTIONS - command: | - unset JAVA_TOOL_OPTIONS + vscode: - extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files - # - cssho.vscode-svgviewer # SVG viewer - - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - - mechatroner.rainbow-csv # Highlight columns in csv files in different colors - # - nextflow.nextflow # Nextflow syntax highlighting - - oderwat.indent-rainbow # Highlight indentation level - - streetsidesoftware.code-spell-checker # Spelling checker for source code + extensions: + - nf-core.nf-core-extensionpack # https://github.com/nf-core/vscode-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index 85e18745..2a4ce6d7 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -12,6 +12,9 @@ lint: - LICENCE - lib/NfcoreTemplate.groovy - CODE_OF_CONDUCT.md + - assets/sendmail_template.txt + - assets/email_template.html + - assets/email_template.txt - assets/nf-core-blobtoolkit_logo_light.png - docs/images/nf-core-blobtoolkit_logo_light.png - docs/images/nf-core-blobtoolkit_logo_dark.png @@ -19,18 +22,25 @@ lint: - .github/PULL_REQUEST_TEMPLATE.md - .github/workflows/linting.yml - .github/workflows/branch.yml + - .github/CONTRIBUTING.md + - .github/workflows/linting_comment.yml + merge_markers: false multiqc_config: - report_comment nextflow_config: - manifest.name - manifest.homePage - template_strings: False - merge_markers: False + template_strings: false +nf_core_version: 3.2.0 repository_type: pipeline template: author: priyanka-surana description: Quality assessment of genome assemblies + force: false + is_nfcore: false name: blobtoolkit - prefix: sanger-tol - skip: + org: sanger-tol + outdir: . + skip_features: - igenomes + version: 0.7.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c31cdb9..1dec8650 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,13 @@ repos: - repo: https://github.com/pre-commit/mirrors-prettier - rev: "v2.7.1" + rev: "v3.1.0" hooks: - id: prettier + additional_dependencies: + - prettier@3.2.5 + + - repo: https://github.com/editorconfig-checker/editorconfig-checker.python + rev: "3.1.2" + hooks: + - id: editorconfig-checker + alias: ec diff --git a/.prettierignore b/.prettierignore index 5bee2b1c..edd29f01 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,4 +10,4 @@ testing/ testing* *.pyc bin/ -assets/test* +ro-crate-metadata.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..a33b527c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "markdown.styles": ["public/vscode_markdown.css"] +} diff --git a/CHANGELOG.md b/CHANGELOG.md index 050399ed..478f9562 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,43 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [[0.7.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.7.0)] – Psyduck – [2025-03-19] + +- Fetch information about the chromosomes of the assemblies. Used to power + "grid plots". +- Fill in accurate read information in the blobDir. Users are now reqiured + to indicate in the samplesheet whether the reads are paired or single. +- Updated the Blastn settings to allow 7 days runtime at most, since that + covers 99.7% of the jobs. +- Allow database inputs to be optionally compressed (`.tar.gz`) +- Allow `BUSCO` run outputs to be optionally pre-computed and provided with `--precomputed_busco` + +The pipeline is now considered to be a complete and suitable replacement for the Snakemake version +with the caveats that: + +- Resource requirements (CPU and memory) of the processes aren't optimal. +- The Blastn step can take up to a week to run. + +### Software dependencies + +Note, since the pipeline is using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. Only `Docker` or `Singularity` containers are supported, `conda` is not supported. + +| Dependency | Old version | New version | +| ----------- | ----------------- | --------------- | +| blast | 2.14.1 and 2.15.0 | only 2.15.0 | +| blobtoolkit | 4.3.9 | 4.4.4 | +| busco | 5.5.0 | 5.7.1 | +| multiqc | 1.20 and 1.21 | 1.20 and 1.25.1 | +| samtools | 1.18 and 1.19.2 | 1.20 and 1.21 | + +### Parameters + +| Old parameter | New parameter | +| ------------- | ------------------- | +| | --precomputed_busco | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present.
**NB:** Parameter has been **added** if just the new parameter information is present.
**NB:** Parameter has been **removed** if new parameter information isn't present. + ## [[0.6.0](https://github.com/sanger-tol/blobtoolkit/releases/tag/0.6.0)] – Bellsprout – [2024-09-13] The pipeline has now been validated for draft (unpublished) assemblies. @@ -87,13 +124,13 @@ The pipeline has now been validated on dozens of genomes, up to 11 Gbp. Note, since the pipeline is using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. Only `Docker` or `Singularity` containers are supported, `conda` is not supported. -| Dependency | Old version | New version | -| ----------- | ------------- | ------------- | -| blobtoolkit | 4.3.3 | 4.3.9 | -| blast | 2.14.0 | 2.15.0 | -| multiqc | 1.17 and 1.18 | 1.20 and 1.21 | -| samtools | 1.18 | 1.19.2 | -| seqtk | 1.3 | 1.4 | +| Dependency | Old version | New version | +| ----------- | ------------- | ----------------- | +| blobtoolkit | 4.3.3 | 4.3.9 | +| blast | 2.14.0 | 2.15.0 and 2.14.1 | +| multiqc | 1.17 and 1.18 | 1.20 and 1.21 | +| samtools | 1.18 | 1.18 and 1.19.2 | +| seqtk | 1.3 | 1.4 | > **NB:** Dependency has been **updated** if both old and new version information is present.
**NB:** Dependency has been **added** if just the new version information is present.
**NB:** Dependency has been **removed** if version information isn't present. diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..347499ad --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,48 @@ +cff-version: 1.2.0 +title: sanger-tol/blobtoolkit v0.7.0 +authors: + - family-names: Butt + given-names: Zaynab + affiliation: Wellcome Sanger Institute + orcid: 0009-0009-7934-8440 + - family-names: Chafin + given-names: Tyler + affiliation: Wellcome Sanger Institute + orcid: 0000-0001-8687-5905 + - family-names: Challis + given-names: Rich + affiliation: Wellcome Sanger Institute + orcid: 0000-0002-3502-1122 + - family-names: Kumar + given-names: Sujai + affiliation: Wellcome Sanger Institute + orcid: 0000-0001-5902-6641 + - family-names: Muffato + given-names: Matthieu + affiliation: Wellcome Sanger Institute + orcid: 0000-0002-7860-3560 + - family-names: Pointon + given-names: Damon-Lee + affiliation: Wellcome Sanger Institute + orcid: 0000-0003-2949-6719 + - family-names: Qi + given-names: Guoying + orcid: 0000-0003-1262-8973 + affiliation: Wellcome Sanger Institute + - family-names: "Ramos D\xEDaz" + given-names: Alexander + affiliation: Wellcome Sanger Institute + orcid: 0000-0001-6410-3349 + - family-names: Surana + given-names: Priyanka + affiliation: Wellcome Sanger Institute + orcid: 0000-0002-7167-0875 + - family-names: Yates + given-names: Bethan + affiliation: Wellcome Sanger Institute + orcid: 0000-0003-1658-1762 +doi: 10.5281/zenodo.13758882 +repository-code: "https://github.com/sanger-tol/blobtoolkit" +license: MIT +version: 0.7.0 +date-released: "2024-09-13" diff --git a/CITATIONS.md b/CITATIONS.md index 8b960779..1e18be72 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,6 +12,10 @@ ## Pipeline tools +- [BLAST+](https://blast.ncbi.nlm.nih.gov/doc/blast-help/downloadblastdata.html) + + > Camacho, Chritiam, et al. “BLAST+: architecture and applications.” BMC Bioinformatics, vol. 10, no. 412, Dec. 2009, https://doi.org/10.1186/1471-2105-10-421 + - [BlobToolKit](https://github.com/blobtoolkit/blobtoolkit) > Challis, Richard, et al. “BlobToolKit – Interactive Quality Assessment of Genome Assemblies.” G3 Genes|Genomes|Genetics, vol. 10, no. 4, Apr. 2020, pp. 1361–74, https://doi.org/10.1534/g3.119.400908. @@ -26,9 +30,7 @@ - [Fasta_windows](https://github.com/tolkit/fasta_windows) -- [GoaT](https://goat.genomehubs.org) - - > Challis, Richard, et al. “Genomes on a Tree (GoaT): A versatile, scalable search engine for genomic and sequencing project metadata across the eukaryotic tree of life.” Wellcome Open Research, vol. 8, no. 24, 2023, https://doi.org/10.12688/wellcomeopenres.18658.1. + > Brown, Max, et al. "Fasta_windows v0.2.3". GitHub, 2021. https://github.com/tolkit/fasta_windows - [Minimap2](https://github.com/lh3/minimap2) @@ -42,11 +44,15 @@ > Danecek, Petr, et al. “Twelve Years of SAMtools and BCFtools.” GigaScience, vol. 10, no. 2, Jan. 2021, https://doi.org/10.1093/gigascience/giab008. +- [SeqTK](https://github.com/lh3/seqtk) + + > Li, Heng. "SeqTK v1.4" GitHub, 2023, https://github.com/lh3/seqtk + ## Software packaging/containerisation tools -- [Anaconda](https://anaconda.com) +- [Conda](https://conda.org/) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + > conda contributors. conda: A system-level, binary package and environment manager running on all major operating systems and platforms. Computer software. https://github.com/conda/conda - [Bioconda](https://bioconda.github.io) diff --git a/LICENSE b/LICENSE index dbbab5cd..1e3ca621 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2022-2023 Genome Research Ltd. +Copyright (c) 2022-2025 Genome Research Ltd. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index b55ab353..f635c40c 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,15 @@ # ![sanger-tol/blobtoolkit](docs/images/sanger-tol-blobtoolkit_logo.png) -[![GitHub Actions CI Status](https://github.com/sanger-tol/blobtoolkit/workflows/nf-core%20CI/badge.svg)](https://github.com/sanger-tol/blobtoolkit/actions?query=workflow%3A%22nf-core+CI%22) -[![GitHub Actions Linting Status](https://github.com/sanger-tol/blobtoolkit/workflows/nf-core%20linting/badge.svg)](https://github.com/sanger-tol/blobtoolkit/actions?query=workflow%3A%22nf-core+linting%22)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7949058-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7949058) +[![GitHub Actions CI Status](https://github.com/sanger-tol/blobtoolkit/actions/workflows/ci.yml/badge.svg)](https://github.com/sanger-tol/blobtoolkit/actions/workflows/ci.yml) +[![GitHub Actions Linting Status](https://github.com/sanger-tol/blobtoolkit/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/blobtoolkit/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.7949058-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.7949058) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) + + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=conda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) -[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/sanger-tol/blobtoolkit) +[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/blobtoolkit) ## Introduction @@ -20,8 +22,8 @@ It takes a samplesheet of BAM/CRAM/FASTQ/FASTA files as input, calculates genome 4. Run BUSCO ([`busco`](https://busco.ezlab.org/)) 5. Extract BUSCO genes ([`blobtoolkit/extractbuscos`](https://github.com/blobtoolkit/blobtoolkit)) 6. Run Diamond BLASTp against extracted BUSCO genes ([`diamond/blastp`](https://github.com/bbuchfink/diamond)) -7. Run BLASTx against sequences with no hit ([`blast/blastn`](https://www.ncbi.nlm.nih.gov/books/NBK131777/)) -8. Run BLASTn against sequences still with not hit ([`blast/blastx`](https://www.ncbi.nlm.nih.gov/books/NBK131777/)) +7. Run BLASTx against sequences with no hit ([`diamond/blastx`](https://github.com/bbuchfink/diamond)) +8. Run BLASTn against sequences still with not hit ([`blast/blastn`](https://www.ncbi.nlm.nih.gov/books/NBK131777/)) 9. Count BUSCO genes ([`blobtoolkit/countbuscos`](https://github.com/blobtoolkit/blobtoolkit)) 10. Generate combined sequence stats across various window sizes ([`blobtoolkit/windowstats`](https://github.com/blobtoolkit/blobtoolkit)) 11. Imports analysis results into a BlobDir dataset ([`blobtoolkit/blobdir`](https://github.com/blobtoolkit/blobtoolkit)) @@ -37,13 +39,17 @@ First, prepare a samplesheet with your input data that looks as follows: `samplesheet.csv`: ```csv -sample,datatype,datafile -mMelMel3,hic,GCA_922984935.2.hic.mMelMel3.cram -mMelMel1,illumina,GCA_922984935.2.illumina.mMelMel1.cram -mMelMel3,ont,GCA_922984935.2.ont.mMelMel3.cram +sample,datatype,datafile,library_layout +mMelMel3,hic,GCA_922984935.2.hic.mMelMel3.cram,PAIRED +mMelMel1,illumina,GCA_922984935.2.illumina.mMelMel1.cram,PAIRED +mMelMel3,ont,GCA_922984935.2.ont.mMelMel3.cram,SINGLE ``` -Each row represents an aligned file. Rows with the same sample identifier are considered technical replicates. The datatype refers to the sequencing technology used to generate the underlying raw data and follows a controlled vocabulary (`ont`, `hic`, `pacbio`, `pacbio_clr`, `illumina`). The aligned read files can be generated using the [sanger-tol/readmapping](https://github.com/sanger-tol/readmapping) pipeline. +Each row represents an aligned file. +Rows with the same sample identifier are considered technical replicates. +The datatype refers to the sequencing technology used to generate the underlying raw data and follows a controlled vocabulary (`ont`, `hic`, `pacbio`, `pacbio_clr`, `illumina`). +The library layout indicates whether the reads are paired or single. +The aligned read files can be generated using the [sanger-tol/readmapping](https://github.com/sanger-tol/readmapping) pipeline. Now, you can run the pipeline using: @@ -62,8 +68,7 @@ nextflow run sanger-tol/blobtoolkit \ ``` > [!WARNING] -> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; -> see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). +> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files). For more details, please refer to the [usage documentation](https://pipelines.tol.sanger.ac.uk/blobtoolkit/usage) and the [parameter documentation](https://pipelines.tol.sanger.ac.uk/blobtoolkit/parameters). @@ -77,9 +82,8 @@ sanger-tol/blobtoolkit was written in Nextflow by [Alexander Ramos Diaz](https:/ We thank the following people for their assistance in the development of this pipeline: - - - [Guoying Qi](https://github.com/gq1) +- [Bethan Yates](https://github.com/BethYates) ## Contributions and Support @@ -89,11 +93,9 @@ If you would like to contribute to this pipeline, please see the [contributing g If you use sanger-tol/blobtoolkit for your analysis, please cite it using the following doi: [10.5281/zenodo.7949058](https://doi.org/10.5281/zenodo.7949058) - - An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. -This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/master/LICENSE). +This pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE). > **The nf-core framework for community-curated bioinformatics pipelines.** > diff --git a/assets/email_template.html b/assets/email_template.html index add38111..85b1b17b 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,7 +12,7 @@ -

sanger-tol/blobtoolkit v${version}

+

sanger-tol/blobtoolkit ${version}

Run Name: $runName

<% if (!success){ diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 03f46915..d5c59938 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,8 +1,6 @@ report_comment: > - - This report has been generated by the sanger-tol/blobtoolkit + This report has been generated by the sanger-tol/blobtoolkit analysis pipeline. - report_section_order: "sanger-tol-blobtoolkit-methods-description": order: -1000 @@ -12,3 +10,5 @@ report_section_order: order: -1002 export_plots: true + +disable_version_detection: true diff --git a/assets/schema_fetchngs_input.json b/assets/schema_fetchngs_input.json new file mode 100644 index 00000000..654e69a8 --- /dev/null +++ b/assets/schema_fetchngs_input.json @@ -0,0 +1,99 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/sanger-tol/blobtoolkit/main/assets/schema_fetchngs_input.json", + "title": "sanger-tol/blobtoolkit pipeline - params.input schema for nf-core/fetchngs samplesheets", + "description": "Schema for the file provided with params.input if it originates from a run of the nf-core/fetchngs pipeline", + "type": "array", + "items": { + "type": "object", + "properties": { + "run_accession": { + "type": "string", + "description": "Accession number", + "pattern": "^\\S+$", + "errorMessage": "Accession number of the sequencing run", + "meta": ["run_accession"] + }, + "instrument_platform": { + "type": "string", + "pattern": "^\\S+$", + "enum": ["ILLUMINA", "OXFORD_NANOPORE", "PACBIO_SMRT"], + "errorMessage": "Sequencing platform used. Only 'ILLUMINA', 'OXFORD_NANOPORE', and 'PACBIO_SMRT', are supported", + "meta": ["instrument_platform"] + }, + "instrument_model": { + "type": "string", + "pattern": "^.+$", + "errorMessage": "Model of the sequencing instrument used", + "meta": ["instrument_model"] + }, + "library_strategy": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "The sequencing technique intended for the library", + "meta": ["library_strategy"] + }, + "library_layout": { + "type": "string", + "pattern": "^(SINGLE|PAIRED)$", + "errorMessage": "The only valid layouts are SINGLE and PAIRED", + "meta": ["library_layout"] + }, + "fastq_1": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "meta": ["fastq_1"] + }, + "fastq_2": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "meta": ["fastq_2"] + }, + "sample": {}, + "experiment_accession": {}, + "sample_accession": {}, + "secondary_sample_accession": {}, + "study_accession": {}, + "secondary_study_accession": {}, + "submission_accession": {}, + "run_alias": {}, + "experiment_alias": {}, + "sample_alias": {}, + "study_alias": {}, + "library_selection": {}, + "library_source": {}, + "library_name": {}, + "base_count": {}, + "read_count": {}, + "tax_id": {}, + "scientific_name": {}, + "sample_title": {}, + "experiment_title": {}, + "study_title": {}, + "sample_description": {}, + "fastq_md5": {}, + "fastq_bytes": {}, + "fastq_ftp": {}, + "fastq_galaxy": {}, + "fastq_aspera": {} + }, + "required": [ + "run_accession", + "instrument_platform", + "instrument_model", + "library_strategy", + "library_layout", + "fastq_1" + ], + "dependentRequired": { + "fastq_2": ["fastq_1"] + } + }, + "uniqueEntries": ["run_accession"] +} diff --git a/assets/schema_input.json b/assets/schema_input.json index 26ed41cb..db9d05c9 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -1,6 +1,6 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/sanger-tol/blobtoolkit/master/assets/schema_input.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/sanger-tol/blobtoolkit/main/assets/schema_input.json", "title": "sanger-tol/blobtoolkit pipeline - params.input schema", "description": "Schema for the file provided with params.input", "type": "array", @@ -11,20 +11,31 @@ "type": "string", "description": "Sample Name", "pattern": "^\\S+$", - "errorMessage": "Sample name must be provided and cannot contain spaces" + "errorMessage": "Sample name must be provided, be unique, and cannot contain spaces", + "meta": ["id"] }, "datatype": { "type": "string", "pattern": "^\\S+$", "enum": ["hic", "illumina", "ont", "pacbio", "pacbio_clr"], - "errorMessage": "Data type, and must be one of: 'hic' or 'illumina' or 'ont' or 'pacbio'" + "errorMessage": "Data type, and must be one of: 'hic' or 'illumina' or 'ont' or 'pacbio'", + "meta": ["datatype"] }, "datafile": { "type": "string", + "format": "file-path", + "exists": true, "pattern": "^\\S+\\.(bam|cram|fa|fa.gz|fasta|fasta.gz|fq|fq.gz|fastq|fastq.gz)$", "errorMessage": "Data file for reads cannot contain spaces and must be BAM/CRAM/FASTQ/FASTA" + }, + "library_layout": { + "type": "string", + "pattern": "^(SINGLE|PAIRED)$", + "errorMessage": "The only valid layouts are SINGLE and PAIRED", + "meta": ["layout"] } }, "required": ["datafile", "datatype", "sample"] - } + }, + "uniqueEntries": ["sample"] } diff --git a/assets/test/GCA_922984935.2.yaml b/assets/test/GCA_922984935.2.yaml deleted file mode 100644 index 4911d14a..00000000 --- a/assets/test/GCA_922984935.2.yaml +++ /dev/null @@ -1,79 +0,0 @@ -assembly: - accession: GCA_922984935.2 - alias: mMelMel3.2 paternal haplotype - bioproject: PRJEB49353 - biosample: SAMEA7524400 - file: ./GCA_922984935.2/assembly/GCA_922984935.2.fasta.gz - level: chromosome - prefix: CAKLPM02 - scaffold-count: 538 - span: 2738694574 - url: ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/922/984/935/GCA_922984935.2_mMelMel3.2_paternal_haplotype/GCA_922984935.2_mMelMel3.2_paternal_haplotype_genomic.fna.gz -busco: - basal_lineages: - - eukaryota_odb10 - - bacteria_odb10 - - archaea_odb10 - download_dir: ./busco - lineages: - - carnivora_odb10 - - laurasiatheria_odb10 - - eutheria_odb10 - - mammalia_odb10 - - tetrapoda_odb10 - - vertebrata_odb10 - - metazoa_odb10 - - eukaryota_odb10 - - bacteria_odb10 - - archaea_odb10 -fields: - categories: - file: ./GCA_922984935.2/assembly/GCA_922984935.2.categories.tsv - synonyms: - file: ./GCA_922984935.2/assembly/GCA_922984935.2.synonyms.tsv - prefix: insdc -reads: - paired: [] - single: [] -revision: 0 -settings: - blast_chunk: 100000 - blast_max_chunks: 10 - blast_min_length: 1000 - blast_overlap: 0 - stats_chunk: 1000 - stats_windows: - - 0.1 - - 0.01 - - 100000 - - 1000000 - taxdump: ./taxdump - tmp: /tmp -similarity: - blastn: - name: nt - path: ./nt - defaults: - evalue: 1.0e-10 - import_evalue: 1.0e-25 - max_target_seqs: 10 - taxrule: buscogenes - diamond_blastp: - import_max_target_seqs: 100000 - name: reference_proteomes - path: ./uniprot - taxrule: blastp=buscogenes - diamond_blastx: - name: reference_proteomes - path: ./uniprot -taxon: - class: Mammalia - family: Mustelidae - genus: Meles - kingdom: Metazoa - name: Meles meles - order: Carnivora - phylum: Chordata - superkingdom: Eukaryota - taxid: '9662' -version: 1 diff --git a/assets/test/mMelMel3.1.buscogenes.dmnd b/assets/test/mMelMel3.1.buscogenes.dmnd deleted file mode 100644 index 391345ba..00000000 Binary files a/assets/test/mMelMel3.1.buscogenes.dmnd and /dev/null differ diff --git a/assets/test/mMelMel3.1.buscoregions.dmnd b/assets/test/mMelMel3.1.buscoregions.dmnd deleted file mode 100644 index 91fa6042..00000000 Binary files a/assets/test/mMelMel3.1.buscoregions.dmnd and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ndb b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ndb deleted file mode 100644 index 18062436..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ndb and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nhr b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nhr deleted file mode 100644 index 0b5d4906..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nhr and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nin b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nin deleted file mode 100644 index bebd568b..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nin and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nog b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nog deleted file mode 100644 index e6ef79c7..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nog and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nos b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nos deleted file mode 100644 index 99700566..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nos and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.not b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.not deleted file mode 100644 index 047e8d38..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.not and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nsq b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nsq deleted file mode 100644 index 48497573..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nsq and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ntf b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ntf deleted file mode 100644 index 3be5ea5b..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.ntf and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nto b/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nto deleted file mode 100644 index 6d4a41c7..00000000 Binary files a/assets/test/nt_mMelMel3.1/nt_mMelMel3.1.nto and /dev/null differ diff --git a/assets/test/nt_mMelMel3.1/taxonomy4blast.sqlite3 b/assets/test/nt_mMelMel3.1/taxonomy4blast.sqlite3 deleted file mode 100644 index dc933c1f..00000000 Binary files a/assets/test/nt_mMelMel3.1/taxonomy4blast.sqlite3 and /dev/null differ diff --git a/assets/test/samplesheet.csv b/assets/test/samplesheet.csv index 5f8d4463..2431b0e0 100644 --- a/assets/test/samplesheet.csv +++ b/assets/test/samplesheet.csv @@ -1,5 +1,5 @@ -sample,datatype,datafile -mMelMel3,hic,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/hic/GCA_922984935.2.subset.unmasked.hic.mMelMel3.cram -mMelMel1,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel1.cram -mMelMel2,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel2.cram -mMelMel3,ont,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/ont/GCA_922984935.2.subset.unmasked.ont.mMelMel3.cram +sample,datatype,datafile,library_layout +mMelMel3_hic,hic,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/hic/GCA_922984935.2.subset.unmasked.hic.mMelMel3.cram,PAIRED +mMelMel1,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel1.cram,PAIRED +mMelMel2,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel2.cram,PAIRED +mMelMel3_ont,ont,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/ont/GCA_922984935.2.subset.unmasked.ont.mMelMel3.cram,SINGLE diff --git a/assets/test/samplesheet_raw.csv b/assets/test/samplesheet_raw.csv index 830753a7..53a5a42e 100644 --- a/assets/test/samplesheet_raw.csv +++ b/assets/test/samplesheet_raw.csv @@ -1,4 +1,4 @@ -sample,datatype,datafile -mMelMel1,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/genomic_data/mMelMel1/illumina/31231_3#1_subset.cram -mMelMel2,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/genomic_data/mMelMel2/illumina/31231_4#1_subset.cram -mMelMel3,hic,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2#1_subset.cram +sample,datatype,datafile,library_layout +mMelMel1,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/genomic_data/mMelMel1/illumina/31231_3#1_subset.cram,PAIRED +mMelMel2,illumina,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/genomic_data/mMelMel2/illumina/31231_4#1_subset.cram,PAIRED +mMelMel3,hic,/lustre/scratch123/tol/resources/nextflow/test-data/Meles_meles/genomic_data/mMelMel3/hic-arima2/35528_2#1_subset.cram,PAIRED diff --git a/assets/test/samplesheet_s3.csv b/assets/test/samplesheet_s3.csv index dbb34181..94055d56 100644 --- a/assets/test/samplesheet_s3.csv +++ b/assets/test/samplesheet_s3.csv @@ -1,5 +1,5 @@ -sample,datatype,datafile -mMelMel3,hic,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/hic/GCA_922984935.2.subset.unmasked.hic.mMelMel3.cram -mMelMel1,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel1.cram -mMelMel2,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel2.cram -mMelMel3,ont,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/ont/GCA_922984935.2.subset.unmasked.ont.mMelMel3.cram +sample,datatype,datafile,library_layout +mMelMel3_hic,hic,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/hic/GCA_922984935.2.subset.unmasked.hic.mMelMel3.cram,PAIRED +mMelMel1,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel1.cram,PAIRED +mMelMel2,illumina,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/illumina/GCA_922984935.2.subset.unmasked.illumina.mMelMel2.cram,PAIRED +mMelMel3_ont,ont,https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/analysis/mMelMel3.2_paternal_haplotype/read_mapping/ont/GCA_922984935.2.subset.unmasked.ont.mMelMel3.cram,SINGLE diff --git a/assets/test_full/full_samplesheet.csv b/assets/test_full/full_samplesheet.csv index 6a3ba69d..fb673840 100644 --- a/assets/test_full/full_samplesheet.csv +++ b/assets/test_full/full_samplesheet.csv @@ -1,3 +1,3 @@ -sample,datatype,datafile -gfLaeSulp1,hic,/lustre/scratch123/tol/resources/nextflow/test-data/Laetiporus_sulphureus/analysis/gfLaeSulp1.1/read_mapping/hic/GCA_927399515.1.unmasked.hic.gfLaeSulp1.cram -gfLaeSulp1,pacbio,/lustre/scratch123/tol/resources/nextflow/test-data/Laetiporus_sulphureus/analysis/gfLaeSulp1.1/read_mapping/pacbio/GCA_927399515.1.unmasked.pacbio.gfLaeSulp1.cram +sample,datatype,datafile,library_layout +gfLaeSulp1_hic,hic,/lustre/scratch123/tol/resources/nextflow/test-data/Laetiporus_sulphureus/analysis/gfLaeSulp1.1/read_mapping/hic/GCA_927399515.1.unmasked.hic.gfLaeSulp1.cram,PAIRED +gfLaeSulp1_pacbio,pacbio,/lustre/scratch123/tol/resources/nextflow/test-data/Laetiporus_sulphureus/analysis/gfLaeSulp1.1/read_mapping/pacbio/GCA_927399515.1.unmasked.pacbio.gfLaeSulp1.cram,SINGLE diff --git a/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd b/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd deleted file mode 100644 index a0d0e1d2..00000000 Binary files a/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd and /dev/null differ diff --git a/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd b/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd deleted file mode 100644 index 3f2a1a54..00000000 Binary files a/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ndb b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ndb deleted file mode 100644 index 0905629a..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ndb and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nhr b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nhr deleted file mode 100644 index 1fa3521a..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nhr and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nin b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nin deleted file mode 100644 index 0503c4c7..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nin and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nog b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nog deleted file mode 100644 index 7dcd60eb..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nog and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nos b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nos deleted file mode 100644 index 6bd1dcdf..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nos and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.not b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.not deleted file mode 100644 index 8bacddec..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.not and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nsq b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nsq deleted file mode 100644 index 6afe38e9..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nsq and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ntf b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ntf deleted file mode 100644 index efd34086..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.ntf and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nto b/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nto deleted file mode 100644 index 4b140ec3..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/nt_gfLaeSulp1.1.nto and /dev/null differ diff --git a/assets/test_full/nt_gfLaeSulp1.1/taxonomy4blast.sqlite3 b/assets/test_full/nt_gfLaeSulp1.1/taxonomy4blast.sqlite3 deleted file mode 100644 index 2a56a82f..00000000 Binary files a/assets/test_full/nt_gfLaeSulp1.1/taxonomy4blast.sqlite3 and /dev/null differ diff --git a/bin/check_fetchngs_samplesheet.py b/bin/check_fetchngs_samplesheet.py deleted file mode 100755 index 324811c9..00000000 --- a/bin/check_fetchngs_samplesheet.py +++ /dev/null @@ -1,252 +0,0 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = (".fastq.gz",) - - def __init__( - self, - accession_col="run_accession", - model_col="instrument_model", - platform_col="instrument_platform", - library_col="library_strategy", - file1_col="fastq_1", - file2_col="fastq_2", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - accession_col (str): The name of the column that contains the accession name - (default "run_accession"). - model_col (str): The name of the column that contains the model name - of the instrument (default "instrument_model"). - platform_col (str): The name of the column that contains the platform name - of the instrument (default "instrument_platform"). - library_col (str): The name of the column that contains the strategy of the - preparation of the library (default "library_strategy"). - file2_col (str): The name of the column that contains the second file path - for the paired-end read data (default "fastq_2"). - """ - super().__init__(**kwargs) - self._accession_col = accession_col - self._model_col = model_col - self._platform_col = platform_col - self._library_col = library_col - self._file1_col = file1_col - self._file2_col = file2_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_accession(row) - self._validate_file(row) - self._seen.add((row[self._accession_col], row[self._file1_col])) - self.modified.append(row) - - def _validate_accession(self, row): - """Assert that the run accession name exists.""" - if len(row[self._accession_col]) <= 0: - raise AssertionError("Run accession is required.") - - def _validate_file(self, row): - """Assert that the datafile is non-empty and has the right format.""" - if len(row[self._file1_col]) <= 0: - raise AssertionError("Data file is required.") - self._validate_data_format(row[self._file1_col]) - if row[self._file2_col]: - self._validate_data_format(row[self._file2_col]) - - def _validate_data_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The data file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_accessions(self): - """ - Assert that the combination of accession name and aligned filename is unique. - - In addition to the validation, also rename all accessions to have a suffix of _T{n}, where n is the - number of times the same accession exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of accession and file name must be unique.") - seen = Counter() - for row in self.modified: - accession = row[self._accession_col] - seen[accession] += 1 - row[self._accession_col] = f"{accession}_T{seen[accession]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by sanger-tol pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `blobtoolkit samplesheet`_:: - - sample,datatype,datafile - sample1,hic,/path/to/file1.cram - sample1,pacbio,/path/to/file2.cram - sample1,ont,/path/to/file3.cram - - .. _blobtoolkit samplesheet: - https://raw.githubusercontent.com/sanger-tol/blobtoolkit/main/assets/test/samplesheet.csv - - """ - required_columns = { - "run_accession", - "instrument_model", - "instrument_platform", - "library_strategy", - "fastq_1", - "fastq_2", - } - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_accessions() - header = list(reader.fieldnames) - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - parser.add_argument( - "-v", - "--version", - action="version", - version="%(prog)s 1.0.0", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py deleted file mode 100755 index 6b5392bf..00000000 --- a/bin/check_samplesheet.py +++ /dev/null @@ -1,263 +0,0 @@ -#!/usr/bin/env python - - -"""Provide a command line tool to validate and transform tabular samplesheets.""" - - -import argparse -import csv -import logging -import sys -from collections import Counter -from pathlib import Path - -logger = logging.getLogger() - - -class RowChecker: - """ - Define a service that can validate and transform each given row. - - Attributes: - modified (list): A list of dicts, where each dict corresponds to a previously - validated and transformed row. The order of rows is maintained. - - """ - - VALID_FORMATS = ( - ".cram", - ".bam", - ".fq", - ".fq.gz", - ".fastq", - ".fastq.gz", - ".fa", - ".fa.gz", - ".fasta", - ".fasta.gz", - ) - - VALID_DATATYPES = ( - "hic", - "illumina", - "pacbio", - "pacbio_clr", - "ont", - ) - - def __init__( - self, - sample_col="sample", - type_col="datatype", - file_col="datafile", - **kwargs, - ): - """ - Initialize the row checker with the expected column names. - - Args: - sample_col (str): The name of the column that contains the sample name - (default "sample"). - type_col (str): The name of the column that contains the dataype for - the read data (default "datatype"). - file_col (str): The name of the column that contains the file path for - the read data (default "datafile"). - """ - super().__init__(**kwargs) - self._sample_col = sample_col - self._type_col = type_col - self._file_col = file_col - self._seen = set() - self.modified = [] - - def validate_and_transform(self, row): - """ - Perform all validations on the given row. - - Args: - row (dict): A mapping from column headers (keys) to elements of that row - (values). - - """ - self._validate_sample(row) - self._validate_type(row) - self._validate_file(row) - self._seen.add((row[self._sample_col], row[self._file_col])) - self.modified.append(row) - - def _validate_sample(self, row): - """Assert that the sample name exists and convert spaces to underscores.""" - if len(row[self._sample_col]) <= 0: - raise AssertionError("Sample input is required.") - # Sanitize samples slightly. - row[self._sample_col] = row[self._sample_col].replace(" ", "_") - - def _validate_type(self, row): - """Assert that the data type matches expected values.""" - if not any(row[self._type_col] for datatype in self.VALID_DATATYPES): - raise AssertionError( - f"The datatype is unrecognized: {row[self._type_col]}\n" - f"It should be one of: {', '.join(self.VALID_DATATYPES)}" - ) - - def _validate_file(self, row): - """Assert that the datafile is non-empty and has the right format.""" - if len(row[self._file_col]) <= 0: - raise AssertionError("Data file is required.") - self._validate_data_format(row[self._file_col]) - - def _validate_data_format(self, filename): - """Assert that a given filename has one of the expected FASTQ extensions.""" - if not any(filename.endswith(extension) for extension in self.VALID_FORMATS): - raise AssertionError( - f"The data file has an unrecognized extension: {filename}\n" - f"It should be one of: {', '.join(self.VALID_FORMATS)}" - ) - - def validate_unique_samples(self): - """ - Assert that the combination of sample name and aligned filename is unique. - - In addition to the validation, also rename all samples to have a suffix of _T{n}, where n is the - number of times the same sample exist, but with different FASTQ files, e.g., multiple runs per experiment. - - """ - if len(self._seen) != len(self.modified): - raise AssertionError("The pair of sample and file name must be unique.") - seen = Counter() - for row in self.modified: - sample = row[self._sample_col] - seen[sample] += 1 - row[self._sample_col] = f"{sample}_T{seen[sample]}" - - -def read_head(handle, num_lines=10): - """Read the specified number of lines from the current position in the file.""" - lines = [] - for idx, line in enumerate(handle): - if idx == num_lines: - break - lines.append(line) - return "".join(lines) - - -def sniff_format(handle): - """ - Detect the tabular format. - - Args: - handle (text file): A handle to a `text file`_ object. The read position is - expected to be at the beginning (index 0). - - Returns: - csv.Dialect: The detected tabular format. - - .. _text file: - https://docs.python.org/3/glossary.html#term-text-file - - """ - peek = read_head(handle) - handle.seek(0) - sniffer = csv.Sniffer() - dialect = sniffer.sniff(peek) - return dialect - - -def check_samplesheet(file_in, file_out): - """ - Check that the tabular samplesheet has the structure expected by sanger-tol pipelines. - - Validate the general shape of the table, expected columns, and each row. Also add - Args: - file_in (pathlib.Path): The given tabular samplesheet. The format can be either - CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. - file_out (pathlib.Path): Where the validated and transformed samplesheet should - be created; always in CSV format. - - Example: - This function checks that the samplesheet follows the following structure, - see also the `blobtoolkit samplesheet`_:: - - sample,datatype,datafile - sample1,hic,/path/to/file1.cram - sample1,pacbio,/path/to/file2.cram - sample1,ont,/path/to/file3.cram - - .. _blobtoolkit samplesheet: - https://raw.githubusercontent.com/sanger-tol/blobtoolkit/main/assets/test/samplesheet.csv - - """ - required_columns = {"sample", "datatype", "datafile"} - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_in.open(newline="") as in_handle: - reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) - # Validate the existence of the expected header columns. - if not required_columns.issubset(reader.fieldnames): - req_cols = ", ".join(required_columns) - logger.critical(f"The sample sheet **must** contain these column headers: {req_cols}.") - sys.exit(1) - # Validate each row. - checker = RowChecker() - for i, row in enumerate(reader): - try: - checker.validate_and_transform(row) - except AssertionError as error: - logger.critical(f"{str(error)} On line {i + 2}.") - sys.exit(1) - checker.validate_unique_samples() - header = list(reader.fieldnames) - # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. - with file_out.open(mode="w", newline="") as out_handle: - writer = csv.DictWriter(out_handle, header, delimiter=",") - writer.writeheader() - for row in checker.modified: - writer.writerow(row) - - -def parse_args(argv=None): - """Define and immediately parse command line arguments.""" - parser = argparse.ArgumentParser( - description="Validate and transform a tabular samplesheet.", - epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", - ) - parser.add_argument( - "file_in", - metavar="FILE_IN", - type=Path, - help="Tabular input samplesheet in CSV or TSV format.", - ) - parser.add_argument( - "file_out", - metavar="FILE_OUT", - type=Path, - help="Transformed output samplesheet in CSV format.", - ) - parser.add_argument( - "-l", - "--log-level", - help="The desired log level (default WARNING).", - choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), - default="WARNING", - ) - parser.add_argument( - "-v", - "--version", - action="version", - version="%(prog)s 1.0.0", - ) - return parser.parse_args(argv) - - -def main(argv=None): - """Coordinate argument parsing and program execution.""" - args = parse_args(argv) - logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") - if not args.file_in.is_file(): - logger.error(f"The given input file {args.file_in} was not found!") - sys.exit(2) - args.file_out.parent.mkdir(parents=True, exist_ok=True) - check_samplesheet(args.file_in, args.file_out) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/bin/generate_config.py b/bin/generate_config.py index e3d00dfa..a5b51b12 100755 --- a/bin/generate_config.py +++ b/bin/generate_config.py @@ -2,17 +2,22 @@ import argparse import dataclasses +import urllib.parse +import urllib3.util import os import sys import sqlite3 -import requests import typing + +import requests +import requests.adapters import yaml -NCBI_TAXONOMY_API = "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/taxonomy/taxon/%s" +NCBI_TAXONOMY_API = "https://api.ncbi.nlm.nih.gov/datasets/v2/taxonomy/taxon/%s" GOAT_LOOKUP_API = "https://goat.genomehubs.org/api/v2/lookup?searchTerm=%s&result=taxon&size=10&taxonomy=ncbi" GOAT_RECORD_API = "https://goat.genomehubs.org/api/v2/record?recordId=%s&result=taxon&size=10&taxonomy=ncbi" -NCBI_DATASETS_API = "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/%s/dataset_report?filters.assembly_version=all_assemblies" +NCBI_DATASETS_API = "https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/%s/dataset_report?filters.assembly_version=all_assemblies" +NCBI_SEQUENCE_API = "https://api.ncbi.nlm.nih.gov/datasets/v2/genome/accession/%s/sequence_reports" RANKS = [ "genus", @@ -34,12 +39,20 @@ def parse_args(args=None): parser.add_argument("--fasta", required=True, help="Path to the Fasta file of the assembly.") parser.add_argument("--taxon_query", required=True, help="Query string/integer for this taxon.") parser.add_argument("--lineage_tax_ids", required=True, help="Mapping between BUSCO lineages and taxon IDs.") - parser.add_argument("--yml_out", required=True, help="Output YML file.") - parser.add_argument("--csv_out", required=True, help="Output CSV file.") + parser.add_argument("--output_prefix", required=True, help="Prefix to name the output files.") parser.add_argument("--accession", help="Accession number of the assembly (optional).", default=None) parser.add_argument("--busco", help="Requested BUSCO lineages.", default=None) - parser.add_argument("--blastn", required=True, help="Path to the NCBI Taxonomy database") - parser.add_argument("--version", action="version", version="%(prog)s 1.4") + parser.add_argument("--nt", required=True, help="Path to the NT database") + parser.add_argument("--read_id", action="append", help="ID of a read set") + parser.add_argument("--read_type", action="append", help="Type of a read set") + parser.add_argument("--read_layout", action="append", help="Layout of a read set") + parser.add_argument("--read_path", action="append", help="Path of a read set") + parser.add_argument("--blastp", help="Path to the blastp database", required=True) + parser.add_argument("--blastx", help="Path to the blastx database", required=True) + parser.add_argument("--blastn", help="Path to the blastn database", required=True) + parser.add_argument("--taxdump", help="Path to the taxonomy database", required=True) + parser.add_argument("--precomputed_busco", action="append", help="Path to precomputed BUSCO outputs", required=False) + parser.add_argument("--version", action="version", version="%(prog)s 2.0") return parser.parse_args(args) @@ -83,9 +96,16 @@ def fetch_taxon_info_from_goat(taxon_name: typing.Union[str, int]) -> TaxonInfo: return make_taxon_info_from_goat(body) +# Using API, get the taxon_ids of the species and all parents def fetch_taxon_info_from_ncbi(taxon_name: typing.Union[str, int], with_lineage=True) -> typing.Optional[TaxonInfo]: - # Using API, get the taxon_ids of the species and all parents - response = requests.get(NCBI_TAXONOMY_API % taxon_name).json() + # "/" has to be double encoded, e.g. "Gymnodinium sp. CCAP1117/9" -> "Gymnodinium%20sp.%20CCAP1117%252F9" + url_safe_taxon_name = urllib.parse.quote(str(taxon_name).replace("/", "%2F")) + retry_strategy = urllib3.util.Retry(total=5, backoff_factor=0.1, status_forcelist=[429, 500, 502, 503, 504]) + adapter = requests.adapters.HTTPAdapter(max_retries=retry_strategy) + session = requests.Session() + session.mount("http://", adapter) + session.mount("https://", adapter) + response = session.get(NCBI_TAXONOMY_API % url_safe_taxon_name).json() if "taxonomy" in response["taxonomy_nodes"][0]: body = response["taxonomy_nodes"][0]["taxonomy"] if with_lineage: @@ -113,7 +133,12 @@ def get_classification(taxon_info: TaxonInfo) -> typing.Dict[str, str]: return {r: ancestors[r] for r in RANKS if r in ancestors} -def get_odb(taxon_info: TaxonInfo, lineage_tax_ids: str, requested_buscos: typing.Optional[str]) -> typing.List[str]: +def get_odb( + taxon_info: TaxonInfo, + lineage_tax_ids: str, + requested_buscos: typing.Optional[str], + pre_computed_buscos: typing.List[str], +) -> typing.List[str]: # Read the mapping between the BUSCO lineages and their taxon_id with open(lineage_tax_ids) as file_in: lineage_tax_ids_dict: typing.Dict[int, str] = {} @@ -121,12 +146,20 @@ def get_odb(taxon_info: TaxonInfo, lineage_tax_ids: str, requested_buscos: typin arr = line.split() lineage_tax_ids_dict[int(arr[0])] = arr[1] + "_odb10" - if requested_buscos: + valid_odbs = set(lineage_tax_ids_dict.values()) + + if pre_computed_buscos: + # Use pre-computed BUSCO lineages if available + odb_arr = pre_computed_buscos + for odb in odb_arr: + if odb not in valid_odbs: + print(f"Invalid pre-computed BUSCO lineage: {odb}", file=sys.stderr) + sys.exit(1) + elif requested_buscos: odb_arr = requested_buscos.split(",") - valid_odbs = set(lineage_tax_ids_dict.values()) for odb in odb_arr: if odb not in valid_odbs: - print(f"Invalid BUSCO lineage: {odb}", file=sys.stderr) + print(f"Invalid requested BUSCO lineage: {odb}", file=sys.stderr) sys.exit(1) else: # Do the intersection to find the ancestors that have a BUSCO lineage @@ -165,8 +198,23 @@ def get_assembly_info(accession: str) -> typing.Dict[str, typing.Union[str, int] return d -def adjust_taxon_id(blastn: str, taxon_info: TaxonInfo) -> int: - con = sqlite3.connect(os.path.join(blastn, "taxonomy4blast.sqlite3")) +def get_sequence_report(accession: str): + response = requests.get(NCBI_SEQUENCE_API % accession).json() + if not response["reports"]: + print(f"Assembly not found: {accession}", file=sys.stderr) + sys.exit(1) + sequence_report = response["reports"] + for rec in sequence_report: + if rec["role"] == "assembled-molecule": + rec["assembly_level"] = rec["assigned_molecule_location_type"] + else: + rec["assembly_level"] = "na" + rec["chr_name"] = "na" + return sequence_report + + +def adjust_taxon_id(nt: str, taxon_info: TaxonInfo) -> int: + con = sqlite3.connect(os.path.join(nt, "taxonomy4blast.sqlite3")) cur = con.cursor() for taxon_id in [taxon_info.taxon_id] + [anc_taxon_info.taxon_id for anc_taxon_info in taxon_info.lineage]: res = cur.execute("SELECT * FROM TaxidInfo WHERE taxid = ?", (taxon_id,)) @@ -174,49 +222,60 @@ def adjust_taxon_id(blastn: str, taxon_info: TaxonInfo) -> int: return taxon_id +def datatype_to_platform(s): + if s == "ont": + return "OXFORD_NANOPORE" + elif s.startswith("pacbio"): + return "PACBIO_SMRT" + elif s in ["hic", "illumina"]: + return "ILLUMINA" + else: + return "OTHER" + + def print_yaml( file_out, assembly_info: typing.Dict[str, typing.Union[str, int]], taxon_info: TaxonInfo, classification: typing.Dict[str, str], - odb_arr: typing.List[str], + reads, + blastp, + blastx, + blastn, + taxdump, ): data = { "assembly": assembly_info, - "busco": { - "basal_lineages": BUSCO_BASAL_LINEAGES, - # "download_dir": - "lineages": odb_arr + [lin for lin in BUSCO_BASAL_LINEAGES if lin not in odb_arr], + "reads": { + "paired": [], + "single": [], }, - # "reads": {}, "revision": 1, "settings": { - # Only settings.stats_windows is mandatory, everything else is superfluous "blast_chunk": 100000, "blast_max_chunks": 10, "blast_min_length": 1000, "blast_overlap": 0, "stats_chunk": 1000, "stats_windows": [0.1, 0.01, 100000, 1000000], - # "taxdump": , + "taxdump": taxdump, "tmp": "/tmp", }, "similarity": { - # Only the presence similarity.diamond_blastx seems mandatory, everything else is superfluous "blastn": { "name": "nt", - # "path": , + "path": blastn, }, "defaults": {"evalue": 1e-10, "import_evalue": 1e-25, "max_target_seqs": 10, "taxrule": "buscogenes"}, "diamond_blastp": { "import_max_target_seqs": 100000, "name": "reference_proteomes", - # "path": , + "path": blastp, "taxrule": "blastp=buscogenes", }, "diamond_blastx": { "name": "reference_proteomes", - # "path": , + "path": blastx, }, }, "taxon": { @@ -226,6 +285,16 @@ def print_yaml( }, "version": 2, } + + for id, datatype, layout, path in reads: + data["reads"][layout.lower()].append( + { + "prefix": id, + "file": path, + "plaform": datatype_to_platform(datatype), + } + ) + out_dir = os.path.dirname(file_out) make_dir(out_dir) @@ -243,23 +312,68 @@ def print_csv(file_out, taxon_id: int, odb_arr: typing.List[str]): print("busco_lineage", odb_val, sep=",", file=fout) +def print_tsvs(output_prefix, sequence_report): + categories_tsv = f"{output_prefix}.categories.tsv" + synonyms_tsv = f"{output_prefix}.synonyms.tsv" + with open(categories_tsv, "w") as fhc: + with open(synonyms_tsv, "w") as fhs: + print("identifier", "assembly_role", "assembly_level", "assembly_unit", sep="\t", file=fhc) + print("identifier", "name", "assigned_name", "refseq_accession", sep="\t", file=fhs) + for rec in sequence_report: + print( + rec["genbank_accession"], + rec["role"], + rec["assembly_level"], + rec["assembly_unit"], + sep="\t", + file=fhc, + ) + print( + rec["genbank_accession"], + rec["sequence_name"], + rec["chr_name"], + rec.get("refseq_accession", "na"), + sep="\t", + file=fhs, + ) + + def main(args=None): args = parse_args(args) assembly_info: typing.Dict[str, typing.Union[str, int]] if args.accession: assembly_info = get_assembly_info(args.accession) + sequence_report = get_sequence_report(args.accession) else: assembly_info = {"level": "scaffold"} + sequence_report = None assembly_info["file"] = args.fasta taxon_info = fetch_taxon_info(args.taxon_query) classification = get_classification(taxon_info) - odb_arr = get_odb(taxon_info, args.lineage_tax_ids, args.busco) - taxon_id = adjust_taxon_id(args.blastn, taxon_info) - print_yaml(args.yml_out, assembly_info, taxon_info, classification, odb_arr) - print_csv(args.csv_out, taxon_id, odb_arr) + precomputed_busco = [os.path.basename(path).replace("run_", "") for path in (args.precomputed_busco or [])] + odb_arr = get_odb(taxon_info, args.lineage_tax_ids, args.busco, precomputed_busco) + taxon_id = adjust_taxon_id(args.nt, taxon_info) + + if sequence_report: + print_tsvs(args.output_prefix, sequence_report) + + reads = zip(args.read_id, args.read_type, args.read_layout, args.read_path) + + print_yaml( + f"{args.output_prefix}.yaml", + assembly_info, + taxon_info, + classification, + reads, + args.blastp, + args.blastx, + args.blastn, + args.taxdump, + ) + print_csv(f"{args.output_prefix}.csv", taxon_id, odb_arr) if __name__ == "__main__": diff --git a/bin/jsonify_taxdump.py b/bin/jsonify_taxdump.py new file mode 100755 index 00000000..b0f82675 --- /dev/null +++ b/bin/jsonify_taxdump.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +import argparse +import sys + +from blobtools.lib.file_io import write_file +from blobtools.lib.taxdump import Taxdump + + +def parse_args(args=None): + Description = "Parse and digest the taxdump files into a JSON structure, printed on stdout." + + parser = argparse.ArgumentParser(description=Description) + parser.add_argument("taxdump", help="Path to the taxonomy database") + parser.add_argument("--version", action="version", version="%(prog)s 1.0") + return parser.parse_args(args) + + +def main(args=None): + args = parse_args(args) + + taxdump = Taxdump(args.taxdump) + write_file("STDOUT", taxdump.values_to_dict()) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/update_versions.py b/bin/update_versions.py index 1d3491a3..21f379a5 100755 --- a/bin/update_versions.py +++ b/bin/update_versions.py @@ -15,28 +15,10 @@ def parse_args(args=None): parser.add_argument("--meta_in", help="Input JSON file.", required=True) parser.add_argument("--meta_out", help="Output JSON file.", required=True) parser.add_argument("--software", help="Input YAML file.", required=True) - parser.add_argument("--read_id", action="append", help="ID of a read set") - parser.add_argument("--read_type", action="append", help="Type of a read set") - parser.add_argument("--read_path", action="append", help="Path of a read set") - parser.add_argument("--blastp", help="Path to the blastp database", required=True) - parser.add_argument("--blastx", help="Path to the blastx database", required=True) - parser.add_argument("--blastn", help="Path to the blastn database", required=True) - parser.add_argument("--taxdump", help="Path to the taxonomy database", required=True) - parser.add_argument("--version", action="version", version="%(prog)s 1.2.0") + parser.add_argument("--version", action="version", version="%(prog)s 1.3.0") return parser.parse_args(args) -def datatype_to_platform(s): - if s == "ont": - return "OXFORD_NANOPORE" - elif s.startswith("pacbio"): - return "PACBIO_SMRT" - elif s in ["hic", "illumina"]: - return "ILLUMINA" - else: - return "OTHER" - - def update_meta(args): with open(args.meta_in) as fh: infile = json.load(fh) @@ -54,20 +36,6 @@ def update_meta(args): del new_dict["sanger-tol/blobtoolkit"] infile["settings"]["software_versions"] = new_dict - infile["settings"]["taxdump"] = args.taxdump - for k in ["blastn", "diamond_blastp", "diamond_blastx"]: - infile["similarity"].setdefault(k, {}) - infile["similarity"]["blastn"]["path"] = args.blastn - infile["similarity"]["diamond_blastp"]["path"] = args.blastp - infile["similarity"]["diamond_blastx"]["path"] = args.blastx - - infile["reads"] = {} - for id, datatype, path in zip(args.read_id, args.read_type, args.read_path): - infile["reads"][id] = { - "file": path, - "plaform": datatype_to_platform(datatype), - } - return infile diff --git a/conf/base.config b/conf/base.config index 75fa0d06..6d04ed63 100644 --- a/conf/base.config +++ b/conf/base.config @@ -11,112 +11,109 @@ process { // Check the defaults for all processes - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 5 maxErrors = '-1' // Process-specific resource requirements - // NOTE - Please try and re-use the labels below as much as possible. + // NOTE - Please try and reuse the labels below as much as possible. // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. // Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = 1 + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 16.h * task.attempt } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { 20.h * task.attempt } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { 200.GB * task.attempt } } withName: '.*:MINIMAP2_ALIGNMENT:MINIMAP2_CCS' { cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } - memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 14.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } - time = { check_max( 4.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } + memory = { 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 14.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt } + time = { 4.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt } } // Extrapolated from the HIFI settings on the basis of 1 ONT alignment. CLR assumed to behave the same way as ONT withName: '.*:MINIMAP2_ALIGNMENT:MINIMAP2_(CLR|ONT)' { cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } - memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 30.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } - time = { check_max( 1.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } + memory = { 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 30.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt } + time = { 1.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt } } - // Temporarily the same settings as CCS + // Baased on a handful of runs withName: '.*:MINIMAP2_ALIGNMENT:MINIMAP2_(HIC|ILMN)' { cpus = { log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) } - memory = { check_max( 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 14.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt, 'memory' ) } - time = { check_max( 3.h * Math.ceil( meta.read_count / 1000000 ) * task.attempt, 'time' ) } + memory = { 800.MB * log_increase_cpus(4, 2*task.attempt, meta.read_count/1000000, 2) + 14.GB * Math.ceil( Math.pow(meta2.genome_size / 1000000000, 0.6)) * task.attempt } + time = { 1.h * Math.ceil( meta.read_count / 75000000 ) * task.attempt } } withName: 'WINDOWSTATS_INPUT' { - cpus = { check_max( 1 , 'cpus' ) } + cpus = 1 // 2 GB per 1 Gbp - memory = { check_max( 2.GB * task.attempt * Math.ceil(meta.genome_size / 1000000000), 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + memory = { 2.GB * task.attempt * Math.ceil(meta.genome_size / 1000000000) } + time = { 4.h * task.attempt } } withName: 'BLOBTOOLKIT_WINDOWSTATS' { - cpus = { check_max( 1 , 'cpus' ) } + cpus = 1 // 3 GB per 1 Gbp - memory = { check_max( 3.GB * task.attempt * Math.ceil(meta.genome_size / 1000000000), 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + memory = { 3.GB * task.attempt * Math.ceil(meta.genome_size / 1000000000) } + time = { 4.h * task.attempt } } withName: 'FASTAWINDOWS' { // 1 CPU per 1 Gbp - cpus = { check_max( Math.ceil(meta.genome_size / 1000000000), 'cpus' ) } + cpus = { Math.ceil(meta.genome_size / 1000000000) } // 100 MB per 45 Mbp - memory = { check_max( 100.MB * task.attempt * Math.ceil(meta.genome_size / 45000000), 'memory' ) } + memory = { 100.MB * task.attempt * Math.ceil(meta.genome_size / 45000000) } } - withName: BUSCO { + withName: BUSCO_BUSCO { // The formulas below are equivalent to these ranges: // Gbp: [ 1, 2, 4, 8, 16] // CPUs: [ 8, 12, 16, 20, 24] // GB RAM: [16, 32, 64, 128, 256] - memory = { check_max( 1.GB * Math.pow(2, 3 + task.attempt + Math.ceil(positive_log(meta.genome_size/1000000000, 2))) , 'memory' ) } + memory = { 1.GB * Math.pow(2, 3 + task.attempt + Math.ceil(positive_log(meta.genome_size/1000000000, 2))) } cpus = { log_increase_cpus(4, 4*task.attempt, Math.ceil(meta.genome_size/1000000000), 2) } - time = { check_max( 3.h * Math.ceil(meta.genome_size/1000000000) * task.attempt, 'time') } + time = { 3.h * Math.ceil(meta.genome_size/1000000000) * task.attempt } } withName: "BLAST_BLASTN" { - // There are blast failures we don't know how to fix. Just ignore for now - errorStrategy = { task.exitStatus in ((130..145) + 104) ? (task.attempt == process.maxRetries ? 'ignore' : 'retry') : 'finish' } + // There are blast failures we don't know how to fix. We just give up after 3 attempts + errorStrategy = { task.exitStatus in ((130..145) + 104) ? (task.attempt == 3 ? 'ignore' : 'retry') : 'finish' } - // Most jobs complete quickly but some need a lot longer. For those outliers, - // the CPU usage remains usually low, often nearing a single CPU - cpus = { check_max( 6 - (task.attempt-1), 'cpus' ) } - memory = { check_max( 1.GB * Math.pow(4, task.attempt-1), 'memory' ) } - time = { check_max( 10.h * Math.pow(4, task.attempt-1), 'time' ) } - } - withName:CUSTOM_DUMPSOFTWAREVERSIONS { - cache = false + // Most jobs complete quickly but some need a lot longer. For those outliers, + // the CPU usage remains usually low, averaging a single CPU + cpus = { task.attempt == 1 ? 4 : 1 } + memory = 2.GB + time = { task.attempt == 1 ? 4.h : ( task.attempt == 2 ? 47.h : 167.h ) } } } diff --git a/conf/igenomes_ignored.config b/conf/igenomes_ignored.config new file mode 100644 index 00000000..03cc375b --- /dev/null +++ b/conf/igenomes_ignored.config @@ -0,0 +1,9 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Empty genomes dictionary to use when igenomes is ignored. +---------------------------------------------------------------------------------------- +*/ + +params.genomes = [:] diff --git a/conf/modules.config b/conf/modules.config index 1193cf5f..3bb93339 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -12,20 +12,22 @@ process { - withName: "SAMPLESHEET_CHECK" { + withName: "WINDOWMASKER_MKCOUNTS" { + ext.args = "-infmt fasta -sformat obinary" publishDir = [ - path: { "${params.outdir}/pipeline_info/blobtoolkit" }, + path: { "${params.outdir}/repeats/windowmasker" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + saveAs: { filename -> filename.equals("versions.yml") ? null : filename.substring(0, filename.length() - 3) + "obinary" } ] } - withName: "WINDOWMASKER_MKCOUNTS" { - ext.args = "-infmt fasta -sformat obinary" - } - withName: "WINDOWMASKER_USTAT" { ext.args = "-infmt fasta -dust T -outfmt fasta" + publishDir = [ + path: { "${params.outdir}/repeats/windowmasker" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } + ] } withName: "MINIMAP2_HIC" { @@ -80,14 +82,14 @@ process { ] } - withName: "BUSCO" { + withName: "BUSCO_BUSCO" { // Obey "use_work_dir_as_temp", except for large genomes scratch = { !params.use_work_dir_as_temp || (meta.genome_size < 2000000000) } - ext.args = { 'test' in workflow.profile.tokenize(',') ? + ext.args = { 'test' in workflow.profile.tokenize(',') || 'test_raw' in workflow.profile.tokenize(',') || 'test_nobusco' in workflow.profile.tokenize(',') ? // Additional configuration to speed processes up during testing. // Note: BUSCO *must* see the double-quotes around the parameters - '--force --metaeuk_parameters \'"-s=2"\' --metaeuk_rerun_parameters \'"-s=2"\'' - : '--force' } + '--force --metaeuk --metaeuk_parameters \'"-s=2"\' --metaeuk_rerun_parameters \'"-s=2"\'' + : '--force --metaeuk' } } withName: "RESTRUCTUREBUSCODIR" { @@ -138,25 +140,25 @@ process { ext.args = "-task megablast -outfmt '6 qseqid staxids bitscore std' -max_target_seqs 10 -max_hsps 1 -evalue 1.0e-10 -lcase_masking -dust '20 64 1'" } - withName: "CUSTOM_DUMPSOFTWAREVERSIONS" { + withName: "COMPRESSBLOBDIR" { publishDir = [ - path: { "${params.outdir}/pipeline_info/blobtoolkit" }, + path: { "${params.outdir}/blobtoolkit" }, mode: params.publish_dir_mode, - pattern: "*_versions.yml" + saveAs: { filename -> filename.equals("versions.yml") ? null : filename } ] } - withName: "COMPRESSBLOBDIR" { + withName: "BLOBTK_IMAGES" { publishDir = [ - path: { "${params.outdir}/blobtoolkit" }, + path: { "${params.outdir}/blobtoolkit/plots" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals("versions.yml") ? null : filename } ] } - withName: "BLOBTK_IMAGES" { + withName: "JSONIFY_TAXDUMP" { publishDir = [ - path: { "${params.outdir}/blobtoolkit/plots" }, + path: { "${params.outdir}/resources" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals("versions.yml") ? null : filename } ] diff --git a/conf/test.config b/conf/test.config index 8ad70cae..8878bf57 100644 --- a/conf/test.config +++ b/conf/test.config @@ -10,31 +10,35 @@ ---------------------------------------------------------------------------------------- */ +process { + resourceLimits = [ + cpus: 2, + memory: '6.GB', + time: '6.h' + ] +} + params { config_profile_name = 'Test profile' config_profile_description = 'Minimal aligned test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - // Input test data // Specify the paths to your test data // Give any required params for the test so that command line flags are not needed input = "${projectDir}/assets/test/samplesheet_s3.csv" // Fasta references - fasta = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.fasta.gz" + fasta = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.phiXspike.fasta.gz" accession = "GCA_922984935.2" taxon = "Meles meles" // Databases - taxdump = "/lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump" - busco = "/lustre/scratch123/tol/resources/nextflow/busco/blobtoolkit.GCA_922984935.2.2023-08-03" - blastp = "${projectDir}/assets/test/mMelMel3.1.buscogenes.dmnd" - blastx = "${projectDir}/assets/test/mMelMel3.1.buscoregions.dmnd" - blastn = "${projectDir}/assets/test/nt_mMelMel3.1" + // taxdump = "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz" + taxdump = "https://tolit.cog.sanger.ac.uk/test-data/resources/new_taxdump.tar.gz" + busco = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/blobtoolkit.GCA_922984935.2.2023-08-03.tar.gz" + blastp = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscogenes.dmnd.tar.gz" + blastx = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscoregions.dmnd.tar.gz" + blastn = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/nt_mMelMel3.1.tar.gz" // Need to be set to avoid overfilling /tmp use_work_dir_as_temp = true diff --git a/conf/test_full.config b/conf/test_full.config index ca78130e..a86e0050 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -25,11 +25,11 @@ params { taxon = "Laetiporus sulphureus" // Databases - taxdump = "/lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump" + taxdump = "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz" busco = "/lustre/scratch123/tol/resources/busco/latest" - blastp = "${projectDir}/assets/test_full/gfLaeSulp1.1.buscogenes.dmnd" - blastx = "${projectDir}/assets/test_full/gfLaeSulp1.1.buscoregions.dmnd" - blastn = "${projectDir}/assets/test_full/nt_gfLaeSulp1.1" + blastp = "https://tolit.cog.sanger.ac.uk/test-data/Laetiporus_sulphureus/resources/gfLaeSulp1.1.buscogenes.dmnd.tar.gz" + blastx = "https://tolit.cog.sanger.ac.uk/test-data/Laetiporus_sulphureus/resources/gfLaeSulp1.1.buscoregions.dmnd.tar.gz" + blastn = "https://tolit.cog.sanger.ac.uk/test-data/Laetiporus_sulphureus/resources/nt_gfLaeSulp1.1.tar.gz" // Need to be set to avoid overfilling /tmp use_work_dir_as_temp = true diff --git a/conf/test_nobusco.config b/conf/test_nobusco.config new file mode 100644 index 00000000..813e394e --- /dev/null +++ b/conf/test_nobusco.config @@ -0,0 +1,52 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests without BUSCO +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test, + using precomputed BUSCO results to skip running BUSCO within the pipeline. + + Use as follows: + nextflow run sanger-tol/blobtoolkit -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 2, + memory: '6.GB', + time: '6.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal aligned test dataset to check pipeline function' + + // Input test data + // Specify the paths to your test data + // Give any required params for the test so that command line flags are not needed + input = "${projectDir}/assets/test/samplesheet_s3.csv" + + // Fasta references + fasta = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/assembly/release/mMelMel3.1_paternal_haplotype/GCA_922984935.2.subset.phiXspike.fasta.gz" + accession = "GCA_922984935.2" + taxon = "Meles meles" + + // Databases + // taxdump = "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz" + taxdump = "https://tolit.cog.sanger.ac.uk/test-data/resources/new_taxdump.tar.gz" + busco = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/blobtoolkit.GCA_922984935.2.2023-08-03.tar.gz" + blastp = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscogenes.dmnd.tar.gz" + blastx = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscoregions.dmnd.tar.gz" + blastn = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/nt_mMelMel3.1.tar.gz" + + // Precomputed BUSCO outputs + // busco_output_noArchaea.tar.gz deliberately leaves out archaea_odb10 to test the pipeline's detection and filling of missing lineages + // Use *_busco_output.tar.gz for fully precomputed BUSCOs + //precomputed_busco = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/GCA_922984935.2_busco_output_noArchaea.tar.gz" + precomputed_busco = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/GCA_922984935.2_busco_output.tar.gz" + + // Need to be set to avoid overfilling /tmp + use_work_dir_as_temp = true +} diff --git a/conf/test_raw.config b/conf/test_raw.config index 0cf1d16f..14325e44 100644 --- a/conf/test_raw.config +++ b/conf/test_raw.config @@ -10,15 +10,18 @@ ---------------------------------------------------------------------------------------- */ +process { + resourceLimits = [ + cpus: 2, + memory: '6.GB', + time: '6.h' + ] +} + params { config_profile_name = 'Raw test profile' config_profile_description = 'Minimal raw test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - // Input test data // Specify the paths to your test data // Give any required params for the test so that command line flags are not needed @@ -31,11 +34,12 @@ params { taxon = "Meles meles" // Databases - taxdump = "/lustre/scratch123/tol/resources/taxonomy/latest/new_taxdump" - busco = "/lustre/scratch123/tol/resources/nextflow/busco/blobtoolkit.GCA_922984935.2.2023-08-03" - blastp = "${projectDir}/assets/test/mMelMel3.1.buscogenes.dmnd" - blastx = "${projectDir}/assets/test/mMelMel3.1.buscoregions.dmnd" - blastn = "${projectDir}/assets/test/nt_mMelMel3.1/" + // taxdump = "https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz" + taxdump = "https://tolit.cog.sanger.ac.uk/test-data/resources/new_taxdump.tar.gz" + busco = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/blobtoolkit.GCA_922984935.2.2023-08-03.tar.gz" + blastp = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscogenes.dmnd.tar.gz" + blastx = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/mMelMel3.1.buscoregions.dmnd.tar.gz" + blastn = "https://tolit.cog.sanger.ac.uk/test-data/Meles_meles/resources/nt_mMelMel3.1.tar.gz" // Need to be set to avoid overfilling /tmp use_work_dir_as_temp = true diff --git a/docs/output.md b/docs/output.md index e3204a1d..c2ba0af4 100644 --- a/docs/output.md +++ b/docs/output.md @@ -2,7 +2,7 @@ ## Introduction -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +This document describes the output produced by the pipeline. The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. @@ -15,10 +15,11 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - [BlobDir](#blobdir) - Output files viewable on a [BlobToolKit viewer](https://github.com/blobtoolkit/blobtoolkit) - [Static plots](#static-plots) - Static versions of the BlobToolKit plots - [BUSCO](#busco) - BUSCO results +- [Repeat masking](#repeat-masking) - Masked repeats (optional) - [Read alignments](#read-alignments) - Aligned reads (optional) - [Read coverage](#read-coverage) - Read coverage tracks - [Base content](#base-content) - _k_-mer statistics (for k ≤ 4) -- [MultiQC](#multiqc) - Aggregate report describing results from the whole pipeline +- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution ### BlobDir @@ -58,8 +59,8 @@ BUSCO results generated by the pipeline (all BUSCO lineages that match the claas - `busco/` - `/` - - `short_summary.json`: BUSCO scores for that lineage as a tab-separated file. - - `short_summary.tsv`: BUSCO scores for that lineage as JSON. + - `short_summary.json`: BUSCO scores for that lineage as JSON + - `short_summary.tsv`: BUSCO scores for that lineage as a tab-separated file (not for pre-computed BUSCOs). - `short_summary.txt`: BUSCO scores for that lineage as formatted text. - `full_table.tsv`: Coordinates of the annotated BUSCO genes as a tab-separated file. - `missing_busco_list.tsv`: List of the BUSCO genes that could not be found. @@ -68,6 +69,20 @@ BUSCO results generated by the pipeline (all BUSCO lineages that match the claas +### Repeat masking + +Reults from the repeat-masker step -- only if the pipeline is run with `--mask`. + +
+Output files + +- `repeats/` + - `windowmasker/` + - `.fasta`: masked assembly in Fasta format. + - `.obinary`: frequency counts of repeats, in windowmasker's own binary format. + +
+ ### Read alignments Read alignments in BAM format -- only if the pipeline is run with `--align`. @@ -123,7 +138,17 @@ Those files are the raw data used to build the BlobDir. [MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Some of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. -Results generated by MultiQC collate pipeline QC from supported tools. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . +Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + +### Resources / databases + +
+Output files + +- `resources/` + - `new_taxdump.json`: Input NCBI taxonomy digested into a JSON file. Can be used to speed up further runs of the pipeline. + +
### Pipeline information diff --git a/docs/usage.md b/docs/usage.md index 4d0ad33e..3f87c9a3 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,7 +1,5 @@ # sanger-tol/blobtoolkit: Usage -## :warning: Please read this documentation on the nf-core website: [https://pipelines.tol.sanger.ac.uk/blobtoolkit/usage](https://pipelines.tol.sanger.ac.uk/blobtoolkit/usage) - > _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ ## Introduction @@ -10,7 +8,7 @@ ## Samplesheet input -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row as shown in the examples below. ```bash --input '[path to samplesheet file]' @@ -21,30 +19,31 @@ You will need to create a samplesheet with information about the samples you wou The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: ```console -sample,datatype,datafile -sample1,hic,hic.cram -sample2,illumina,illumina.cram -sample2,illumina,illumina.cram +sample,datatype,datafile,library_layout +sample1,hic,hic.cram,PAIRED +sample2,illumina,illumina.cram,PAIRED +sample2,illumina,illumina.cram,PAIRED ``` ### Full samplesheet -The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 4 columns to match those defined in the table below. A final samplesheet file may look something like the one below. ```console -sample,datatype,datafile -sample1,hic,hic.cram -sample2,illumina,illumina.cram -sample3,ont,ont.cram +sample,datatype,datafile,library_layout +sample1,hic,hic.cram,PAIRED +sample2,illumina,illumina.cram,PAIRED +sample3,ont,ont.cram,SINGLE ``` -| Column | Description | -| ---------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (\_). | -| `datatype` | Type of sequencing data. Must be one of `hic`, `illumina`, `pacbio`, `pacbio_clr` or `ont`. | -| `datafile` | Full path to read data file. | +| Column | Description | +| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (\_). | +| `datatype` | Type of sequencing data. Must be one of `hic`, `illumina`, `pacbio`, `pacbio_clr` or `ont`. | +| `datafile` | Full path to read data file. | +| `library_layout` | Layout of the library. Must be one of `SINGLE`, `PAIRED`. | An [example samplesheet](assets/test/samplesheet.csv) has been provided with the pipeline. @@ -53,7 +52,40 @@ An [example samplesheet](assets/test/samplesheet.csv) has been provided with the The pipeline can also accept a samplesheet generated by the [nf-core/fetchngs](https://nf-co.re/fetchngs) pipeline (tested with version 1.11.0). The pipeline then needs the `--fetchngs_samplesheet true` option _and_ `--align true`, since the data files would all be unaligned. -## Getting databases ready for the pipeline +### Support for pre-computed `BUSCO` outputs + +The pipeline may be optionally run with a set of pre-computed [`BUSCO`](https://busco.ezlab.org) runs, provided using the `--precomputed_busco` parameter. These can be provided as either a directory path, or a `.tar.gz` compressed archive. The contents should be each `run_` output directory (directly from `BUSCO`) named as `run_[odb_dabasase_name]`: + +``` +GCA_922984935.2_busco_output/ +├── run_archaea_odb10 +├── run_bacteria_odb10 +├── run_carnivora_odb10 +├── run_eukaryota_odb10 +├── run_eutheria_odb10 +├── run_laurasiatheria_odb10 +├── run_mammalia_odb10 +├── run_metazoa_odb10 +├── run_tetrapoda_odb10 +└── run_vertebrata_odb10 +``` + +The pipeline minimally requires outputs for the 'basal' lineages (archaea, eukaryota, and bacteria) -- any of these which are not present in the pre-computed outputs will be automatically detected and run. + +## Database parameters + +Configure access to your local databases with the `--busco`, `--blastp`, `--blastx`, `--blastn`, and `--taxdump` parameters. + +Note that `--busco` refers to the download path of _all_ lineages. +Then, when explicitly selecting the lineages to run the pipeline on, +provide the names of these lineages _with_ their `_odb10` suffix as a comma-separated string. +For instance: + +```bash +--busco path-to-databases/busco/ --busco_lineages vertebrata_odb10,bacteria_odb10,fungi_odb10 +``` + +### Getting databases ready for the pipeline The BlobToolKit pipeline can be run in many different ways. The default way requires access to several databases: @@ -64,84 +96,111 @@ The BlobToolKit pipeline can be run in many different ways. The default way requ It is a good idea to put a date suffix for each database location so you know at a glance whether you are using the latest version. We are using the `YYYY_MM` format as we do not expect the databases to be updated more frequently than once a month. However, feel free to use `DATE=YYYY_MM_DD` or a different format if you prefer. -### 1. NCBI taxdump database +Note that all input databases may be optionally passed directly to the pipeline compressed as `.tar.gz`, and the pipeline will handle decompression. +The instructions below show how to build each input database in _two_ forms: decompressed _and_ compressed. You may not need to do both. Select the one that is most appropriate for how you want to use the pipeline. -Create the database directory and move into the directory: +#### 1. NCBI taxdump database + +Create the database directory, retrieve and decompress the NCBI taxonomy: ```bash -DATE=2023_03 +DATE=2024_10 TAXDUMP=/path/to/databases/taxdump_${DATE} -mkdir -p $TAXDUMP -cd $TAXDUMP +TAXDUMP_TAR=/path/to/databases/taxdump_${DATE}.tar.gz +mkdir -p "$TAXDUMP" +curl -L ftp://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz -o $TAXDUMP_TAR +tar -xzf $TAXDUMP_TAR -C "$TAXDUMP" ``` -Retrieve and decompress the NCBI taxdump: +The first time the pipeline will run, it will generate a file named `resources/taxdump.json` in the results folder. +This JSON file is a digested version of the taxonomy that can then be fed into the pipeline instead of the bare `new_taxdump` directory to make it run faster. -```bash -curl -L ftp://ftp.ncbi.nih.gov/pub/taxonomy/new_taxdump/new_taxdump.tar.gz | tar xzf - -``` - -### 2. NCBI nucleotide BLAST database +#### 2. NCBI nucleotide BLAST database Create the database directory and move into the directory: ```bash -DATE=2023_03 +DATE=2024_10 NT=/path/to/databases/nt_${DATE} +NT_TAR=/path/to/databases/nt_${DATE}.tar.gz mkdir -p $NT cd $NT ``` -Retrieve the NCBI blast nt database (version 5) files and tar gunzip them. We are using the `&&` syntax to ensure that each command completes without error before the next one is run: +Retrieve the NCBI blast nt database (version 5) files and tar gunzip them. +`wget` and the use of the FTP protocol are necessary to resolve the wildcard `nt.???.tar.gz`. +We are using the `&&` syntax to ensure that each command completes without error before the next one is run: ```bash wget "ftp://ftp.ncbi.nlm.nih.gov/blast/db/v5/nt.???.tar.gz" -P $NT/ && for file in $NT/*.tar.gz; do tar xf $file -C $NT && rm $file; done + +wget "https://ftp.ncbi.nlm.nih.gov/blast/db/v5/taxdb.tar.gz" && +tar xf taxdb.tar.gz -C $NT && +rm taxdb.tar.gz + +# Compress and cleanup +cd .. +tar -cvzf $NT_TAR $NT +rm -r $NT ``` -### 3. UniProt reference proteomes database +#### 3. UniProt reference proteomes database -You need [diamond blast](https://github.com/bbuchfink/diamond) installed for this step. The easiest way is probably using [conda](https://anaconda.org/bioconda/diamond). Make sure you have the latest version of Diamond (>2.x.x) otherwise the `--taxonnames` argument may not work. +You need [diamond blast](https://github.com/bbuchfink/diamond) installed for this step. +The easiest way is probably to install their [pre-compiled release](https://github.com/bbuchfink/diamond/releases). +Make sure you have the latest version of Diamond (>2.x.x) otherwise the `--taxonnames` argument may not work. Create the database directory and move into the directory: ```bash -DATE=2023_03 +DATE=2024_10 UNIPROT=/path/to/databases/uniprot_${DATE} +UNIPROT_TAR=/path/to/databases/uniprot_${DATE}.tar.gz mkdir -p $UNIPROT cd $UNIPROT ``` -The UniProt `Refseq_Proteomes_YYYY_MM.tar.gz` file is very large (>160 GB) and will take a long time to download. The command below looks complex because it needs to get around the problem of using wildcards with wget and curl. +The UniProt `Refseq_Proteomes_YYYY_MM.tar.gz` file is very large (close to 200 GB) and will take a long time to download. +The command below looks complex because it needs to get around the problem of using wildcards with wget and curl. ```bash -wget -q -O $UNIPROT/reference_proteomes.tar.gz \ - ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/$(curl \ - -vs ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/ 2>&1 | \ - awk '/tar.gz/ {print $9}') -tar xf reference_proteomes.tar.gz +EBI_URL=ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/reference_proteomes/ +mkdir extract +curl -L $EBI_URL/$(curl -vs $EBI_URL 2>&1 | awk '/tar.gz/ {print $9}') | \ + tar -xzf - -C extract # Create a single fasta file with all the fasta files from each subdirectory: -touch reference_proteomes.fasta.gz -find . -mindepth 2 | grep "fasta.gz" | grep -v 'DNA' | grep -v 'additional' | xargs cat >> reference_proteomes.fasta.gz +find extract -type f -name '*.fasta.gz' ! -name '*_DNA.fasta.gz' ! -name '*_additional.fasta.gz' -exec cat '{}' '+' > reference_proteomes.fasta.gz # create the accession-to-taxid map for all reference proteome sequences: -printf "accession\taccession.version\ttaxid\tgi\n" > reference_proteomes.taxid_map -zcat */*/*.idmapping.gz | grep "NCBI_TaxID" | awk '{print $1 "\t" $1 "\t" $3 "\t" 0}' >> reference_proteomes.taxid_map +find extract -type f -name '*.idmapping.gz' -exec zcat {} + | \ + awk 'BEGIN {OFS="\t"; print "accession", "accession.version", "taxid", "gi"} $2=="NCBI_TaxID" {print $1, $1, $3, 0}' > reference_proteomes.taxid_map # create the taxon aware diamond blast database diamond makedb -p 16 --in reference_proteomes.fasta.gz --taxonmap reference_proteomes.taxid_map --taxonnodes $TAXDUMP/nodes.dmp --taxonnames $TAXDUMP/names.dmp -d reference_proteomes.dmnd + +# clean up +mv extract/{README,STATS} . +rm -r extract +rm -r $TAXDUMP + +# Compress final database and cleanup +cd .. +tar -cvzf $UNIPROT_TAR $UNIPROT +rm -r $UNIPROT ``` -### 4. BUSCO databases +#### 4. BUSCO databases Create the database directory and move into the directory: ```bash -DATE=2023_03 +DATE=2024_10 BUSCO=/path/to/databases/busco_${DATE} +BUSCO_TAR=/path/to/databases/busco_${DATE}.tar.gz mkdir -p $BUSCO cd $BUSCO ``` @@ -162,6 +221,13 @@ If you have [GNU parallel](https://www.gnu.org/software/parallel/) installed, yo find v5/data -name "*.tar.gz" | parallel "cd {//}; tar -xzf {/}" ``` +Finally re-compress and cleanup the files: + +```bash +tar -cvzf $BUSCO_TAR $BUSCO +rm -r $BUSCO +``` + ## Changes from Snakemake to Nextflow ### Commands @@ -227,8 +293,8 @@ List of tools for any given dataset can be fetched from the API, for example htt | Dependency | Snakemake | Nextflow | | ----------------- | --------- | -------- | -| blobtoolkit | 4.3.2 | 4.3.9 | -| blast | 2.12.0 | 2.15.0 | +| blobtoolkit | 4.3.2 | 4.4.4 | +| blast | 2.12.0 | 2.14.1 | | blobtk | 0.5.0 | 0.5.1 | | busco | 5.3.2 | 5.5.0 | | diamond | 2.0.15 | 2.1.8 | @@ -269,9 +335,8 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. -:::warning -Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -::: +> [!WARNING] +> Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). The above pipeline run specified with a params file in yaml format: @@ -279,12 +344,11 @@ The above pipeline run specified with a params file in yaml format: nextflow run sanger-tol/blobtoolkit -profile docker -params-file params.yaml ``` -with `params.yaml` containing: +with: -```yaml +```yaml title="params.yaml" input: './samplesheet.csv' outdir: './results/' -fasta: 'genome.fasta' <...> ``` @@ -300,23 +364,21 @@ nextflow pull sanger-tol/blobtoolkit ### Reproducibility -It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. +It is a good idea to specify the pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. First, go to the [sanger-tol/blobtoolkit releases page](https://github.com/sanger-tol/blobtoolkit/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag. This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports. -To further assist in reproducbility, you can use share and re-use [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. +To further assist in reproducibility, you can use share and reuse [parameter files](#running-the-pipeline) to repeat pipeline runs with the same settings without having to write out a command with every single parameter. -:::tip -If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. -::: +> [!TIP] +> If you wish to share such profile (such as upload as supplementary material for academic publications), make sure to NOT include cluster specific paths to files, nor institutional specific profiles. ## Core Nextflow arguments -:::note -These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). -::: +> [!NOTE] +> These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen) ### `-profile` @@ -324,16 +386,15 @@ Use this parameter to choose a configuration profile. Profiles can give configur Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Apptainer, Conda) - see below. -:::info -We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -::: +> [!IMPORTANT] +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to check if your system is supported, please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! They are loaded in sequence, so later profiles can overwrite earlier profiles. -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment. +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer environment. - `test` - A profile with a complete configuration for automated testing @@ -350,6 +411,8 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) - `apptainer` - A generic configuration profile to be used with [Apptainer](https://apptainer.org/) +- `wave` + - A generic configuration profile to enable [Wave](https://seqera.io/wave/) containers. Use together with one of the above (requires Nextflow ` 24.03.0-edge` or later). - `conda` - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter, Charliecloud, or Apptainer. @@ -367,13 +430,13 @@ Specify the path to a specific config file (this is a core Nextflow command). Se ### Resource requests -Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the pipeline steps, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher resources request (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. To change the resource requests, please see the [max resources](https://nf-co.re/docs/usage/configuration#max-resources) and [tuning workflow resources](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources) section of the nf-core website. ### Custom Containers -In some cases you may wish to change which container or conda environment a step of the pipeline uses for a particular tool. By default nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However in some cases the pipeline specified version maybe out of date. +In some cases, you may wish to change the container or conda environment used by a pipeline steps for a particular tool. By default, nf-core pipelines use containers and software from the [biocontainers](https://biocontainers.pro/) or [bioconda](https://bioconda.github.io/) projects. However, in some cases the pipeline specified version maybe out of date. To use a different container from the default container or conda environment specified in a pipeline, please see the [updating tool versions](https://nf-co.re/docs/usage/configuration#updating-tool-versions) section of the nf-core website. @@ -391,14 +454,6 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). -## Azure Resource Requests - -To be used with the `azurebatch` profile by specifying the `-profile azurebatch`. -We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required. - -Note that the choice of VM size depends on your quota and the overall workload during the analysis. -For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes). - ## Running in the background Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy deleted file mode 100755 index a2210c6e..00000000 --- a/lib/NfcoreTemplate.groovy +++ /dev/null @@ -1,359 +0,0 @@ -// -// This file holds several functions used within the nf-core pipeline template. -// - -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput -import nextflow.extension.FilesEx - -class NfcoreTemplate { - - // - // Check AWS Batch related parameters have been specified correctly - // - public static void awsBatch(workflow, params) { - if (workflow.profile.contains('awsbatch')) { - // Check params.awsqueue and params.awsregion have been set if running on AWSBatch - assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" - // Check outdir paths to be S3 buckets if running on AWSBatch - assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" - } - } - - // - // Warn if a -profile or Nextflow config has not been provided to run the pipeline - // - public static void checkConfigProvided(workflow, log) { - if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { - log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + - "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + - " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + - " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + - " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + - "Please refer to the quick start section and usage docs for the pipeline.\n " - } - } - - // - // Generate version string - // - public static String version(workflow) { - String version_string = "" - - if (workflow.manifest.version) { - def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' - version_string += "${prefix_v}${workflow.manifest.version}" - } - - if (workflow.commitId) { - def git_shortsha = workflow.commitId.substring(0, 7) - version_string += "-g${git_shortsha}" - } - - return version_string - } - - // - // Construct and send completion email - // - public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { - - // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['Date Started'] = workflow.start - misc_fields['Date Completed'] = workflow.complete - misc_fields['Pipeline script file path'] = workflow.scriptFile - misc_fields['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision - misc_fields['Nextflow Version'] = workflow.nextflow.version - misc_fields['Nextflow Build'] = workflow.nextflow.build - misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - def email_fields = [:] - email_fields['version'] = NfcoreTemplate.version(workflow) - email_fields['runName'] = workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary << misc_fields - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - - // Check if we are only sending emails on failure - def email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$projectDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$projectDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] - def sf = new File("$projectDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - Map colors = logColours(params.monochrome_logs) - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") - sendmail_tf.withWriter { w -> w << sendmail_html } - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" - } catch (all) { - // Catch failures and try with plaintext - def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] - if ( mqc_report != null && mqc_report.size() <= max_multiqc_email_size.toBytes() ) { - mail_cmd += [ '-A', mqc_report ] - } - mail_cmd.execute() << email_html - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" - } - } - - // Write summary e-mail HTML to a file - def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.html"); - output_hf.delete() - - // Write summary e-mail TXT to a file - def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${params.outdir}/pipeline_info/pipeline_report.txt"); - output_tf.delete() - } - - // - // Construct and send a notification to a web server as JSON - // e.g. Microsoft Teams and Slack - // - public static void IM_notification(workflow, params, summary_params, projectDir, log) { - def hook_url = params.hook_url - - def summary = [:] - for (group in summary_params.keySet()) { - summary << summary_params[group] - } - - def misc_fields = [:] - misc_fields['start'] = workflow.start - misc_fields['complete'] = workflow.complete - misc_fields['scriptfile'] = workflow.scriptFile - misc_fields['scriptid'] = workflow.scriptId - if (workflow.repository) misc_fields['repository'] = workflow.repository - if (workflow.commitId) misc_fields['commitid'] = workflow.commitId - if (workflow.revision) misc_fields['revision'] = workflow.revision - misc_fields['nxf_version'] = workflow.nextflow.version - misc_fields['nxf_build'] = workflow.nextflow.build - misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp - - def msg_fields = [:] - msg_fields['version'] = NfcoreTemplate.version(workflow) - msg_fields['runName'] = workflow.runName - msg_fields['success'] = workflow.success - msg_fields['dateComplete'] = workflow.complete - msg_fields['duration'] = workflow.duration - msg_fields['exitStatus'] = workflow.exitStatus - msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - msg_fields['errorReport'] = (workflow.errorReport ?: 'None') - msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") - msg_fields['projectDir'] = workflow.projectDir - msg_fields['summary'] = summary << misc_fields - - // Render the JSON template - def engine = new groovy.text.GStringTemplateEngine() - // Different JSON depending on the service provider - // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format - def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" - def hf = new File("$projectDir/assets/${json_path}") - def json_template = engine.createTemplate(hf).make(msg_fields) - def json_message = json_template.toString() - - // POST - def post = new URL(hook_url).openConnection(); - post.setRequestMethod("POST") - post.setDoOutput(true) - post.setRequestProperty("Content-Type", "application/json") - post.getOutputStream().write(json_message.getBytes("UTF-8")); - def postRC = post.getResponseCode(); - if (! postRC.equals(200)) { - log.warn(post.getErrorStream().getText()); - } - } - - // - // Dump pipeline parameters in a json file - // - public static void dump_parameters(workflow, params) { - def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') - def filename = "params_${timestamp}.json" - def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") - def jsonStr = JsonOutput.toJson(params) - temp_pf.text = JsonOutput.prettyPrint(jsonStr) - - FilesEx.copyTo(temp_pf.toPath(), "${params.outdir}/pipeline_info/blobtoolkit/params_${timestamp}.json") - temp_pf.delete() - } - - // - // Print pipeline summary on completion - // - public static void summary(workflow, params, log) { - Map colors = logColours(params.monochrome_logs) - if (workflow.success) { - if (workflow.stats.ignoredCount == 0) { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" - } - } else { - log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" - } - } - - // - // ANSII Colours used for terminal logging - // - public static Map logColours(Boolean monochrome_logs) { - Map colorcodes = [:] - - // Reset / Meta - colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" - colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" - colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" - colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" - colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" - colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" - colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" - - // Regular Colors - colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" - colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" - colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" - colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" - colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" - colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" - colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" - colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" - - // Bold - colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" - colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" - colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" - colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" - colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" - colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" - colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" - colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" - - // Underline - colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" - colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" - colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" - colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" - colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" - colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" - colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" - colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" - - // High Intensity - colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" - colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" - colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" - colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" - colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" - colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" - colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" - colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" - - // Bold High Intensity - colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" - colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" - colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" - colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" - colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" - colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" - colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" - colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" - - return colorcodes - } - - // - // Does what is says on the tin - // - public static String dashedLine(monochrome_logs) { - Map colors = logColours(monochrome_logs) - return "-${colors.dim}----------------------------------------------------${colors.reset}-" - } - - // - // sanger-tol logo - // - public static String logo(workflow, monochrome_logs) { - Map colors = logColours(monochrome_logs) - String workflow_version = NfcoreTemplate.version(workflow) - String.format( - """\n - ${dashedLine(monochrome_logs)} - ${colors.blue} _____ ${colors.green} _______ ${colors.red} _${colors.reset} - ${colors.blue} / ____| ${colors.green}|__ __| ${colors.red}| |${colors.reset} - ${colors.blue} | (___ __ _ _ __ __ _ ___ _ __ ${colors.reset} ___ ${colors.green}| |${colors.yellow} ___ ${colors.red}| |${colors.reset} - ${colors.blue} \\___ \\ / _` | '_ \\ / _` |/ _ \\ '__|${colors.reset}|___|${colors.green}| |${colors.yellow}/ _ \\${colors.red}| |${colors.reset} - ${colors.blue} ____) | (_| | | | | (_| | __/ | ${colors.green}| |${colors.yellow} (_) ${colors.red}| |____${colors.reset} - ${colors.blue} |_____/ \\__,_|_| |_|\\__, |\\___|_| ${colors.green}|_|${colors.yellow}\\___/${colors.red}|______|${colors.reset} - ${colors.blue} __/ |${colors.reset} - ${colors.blue} |___/${colors.reset} - ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset} - ${dashedLine(monochrome_logs)} - """.stripIndent() - ) - } -} diff --git a/lib/Utils.groovy b/lib/Utils.groovy deleted file mode 100755 index 8d030f4e..00000000 --- a/lib/Utils.groovy +++ /dev/null @@ -1,47 +0,0 @@ -// -// This file holds several Groovy functions that could be useful for any Nextflow pipeline -// - -import org.yaml.snakeyaml.Yaml - -class Utils { - - // - // When running with -profile conda, warn if channels have not been set-up appropriately - // - public static void checkCondaChannels(log) { - Yaml parser = new Yaml() - def channels = [] - try { - def config = parser.load("conda config --show channels".execute().text) - channels = config.channels - } catch(NullPointerException | IOException e) { - log.warn "Could not verify conda channel configuration." - return - } - - // Check that all channels are present - // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean - - // Check that they are in the right order - def channel_priority_violation = false - def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } - - if (channels_missing | channel_priority_violation) { - log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + - " There is a problem with your Conda configuration!\n\n" + - " You will need to set-up the conda-forge and bioconda channels correctly.\n" + - " Please refer to https://bioconda.github.io/\n" + - " The observed channel order is \n" + - " ${channels}\n" + - " but the following channel order is required:\n" + - " ${required_channels_in_order}\n" + - "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" - } - } -} diff --git a/lib/WorkflowBlobtoolkit.groovy b/lib/WorkflowBlobtoolkit.groovy deleted file mode 100755 index df59d762..00000000 --- a/lib/WorkflowBlobtoolkit.groovy +++ /dev/null @@ -1,103 +0,0 @@ -// -// This file holds several functions specific to the workflow/blobtoolkit.nf in the sanger-tol/blobtoolkit pipeline -// - -import nextflow.Nextflow -import groovy.text.SimpleTemplateEngine - -class WorkflowBlobtoolkit { - - // - // Check and validate parameters - // - public static void initialise(params, log) { - - - if (!params.fasta) { - Nextflow.error "Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file." - } - } - - // - // Get workflow summary for MultiQC - // - public static String paramsSummaryMultiqc(workflow, summary) { - String summary_section = '' - for (group in summary.keySet()) { - def group_params = summary.get(group) // This gets the parameters of that particular group - if (group_params) { - summary_section += "

$group

\n" - summary_section += "
\n" - for (param in group_params.keySet()) { - summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" - } - summary_section += "
\n" - } - } - - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" - yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" - yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" - yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" - yaml_file_text += "plot_type: 'html'\n" - yaml_file_text += "data: |\n" - yaml_file_text += "${summary_section}" - return yaml_file_text - } - - // - // Generate methods description for MultiQC - // - - public static String toolCitationText(params) { - - // Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def citation_text = [ - "Tools used in the workflow included:", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() - - return citation_text - } - - public static String toolBibliographyText(params) { - - // Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report - def reference_text = [ - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() - - return reference_text - } - - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { - // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file - def meta = [:] - meta.workflow = run_workflow.toMap() - meta["manifest_map"] = run_workflow.manifest.toMap() - - // Pipeline DOI - meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" - meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " - - // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" - - // Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - //meta["tool_bibliography"] = toolBibliographyText(params) - - - def methods_text = mqc_methods_yaml.text - - def engine = new SimpleTemplateEngine() - def description_html = engine.createTemplate(methods_text).make(meta) - - return description_html - }} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy deleted file mode 100755 index 0723e363..00000000 --- a/lib/WorkflowMain.groovy +++ /dev/null @@ -1,52 +0,0 @@ -// -// This file holds several functions specific to the main.nf workflow in the sanger-tol/blobtoolkit pipeline -// - -import nextflow.Nextflow - -class WorkflowMain { - - // - // Citation string for pipeline - // - public static String citation(workflow) { - return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + - // Add Zenodo DOI for pipeline after first release - "* The pipeline\n" + - " https://doi.org/10.5281/zenodo.7949058\n\n" + - "* The nf-core framework\n" + - " https://doi.org/10.1038/s41587-020-0439-x\n\n" + - "* Software dependencies\n" + - " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" - } - - - // - // Validate parameters and print summary to screen - // - public static void initialise(workflow, params, log) { - - // Print workflow version and exit on --version - if (params.version) { - String workflow_version = NfcoreTemplate.version(workflow) - log.info "${workflow.manifest.name} ${workflow_version}" - System.exit(0) - } - - // Check that a -profile or Nextflow config has been provided to run the pipeline - NfcoreTemplate.checkConfigProvided(workflow, log) - - // Check that conda channels are set-up correctly - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - Utils.checkCondaChannels(log) - } - - // Check AWS batch settings - NfcoreTemplate.awsBatch(workflow, params) - - // Check input has been provided - if (!params.input) { - Nextflow.error("Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'") - } - } -} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar deleted file mode 100644 index 805c8bb5..00000000 Binary files a/lib/nfcore_external_java_deps.jar and /dev/null differ diff --git a/main.nf b/main.nf index fbd0f4ae..5b58469b 100644 --- a/main.nf +++ b/main.nf @@ -8,59 +8,81 @@ ---------------------------------------------------------------------------------------- */ -nextflow.enable.dsl = 2 - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE & PRINT PARAMETER SUMMARY + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp } from 'plugin/nf-validation' - -// Print help message if needed -if (params.help) { - def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) - def citation = '\n' + WorkflowMain.citation(workflow) + '\n' - def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" - log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) - System.exit(0) -} - -// Validate input parameters -if (params.validate_params) { - validateParameters() -} - -WorkflowMain.initialise(workflow, params, log) - +include { BLOBTOOLKIT } from './workflows/blobtoolkit' +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_blobtoolkit_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_blobtoolkit_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOW FOR PIPELINE + NAMED WORKFLOWS FOR PIPELINE ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { BLOBTOOLKIT } from './workflows/blobtoolkit' - // -// WORKFLOW: Run main sanger-tol/blobtoolkit analysis pipeline +// WORKFLOW: Run main analysis pipeline depending on type of input // workflow SANGERTOL_BLOBTOOLKIT { - BLOBTOOLKIT () -} + take: + fasta + databases + + main: + + // + // WORKFLOW: Run pipeline + // + BLOBTOOLKIT ( + fasta, + databases, + ) + emit: + multiqc_report = BLOBTOOLKIT.out.multiqc_report // channel: /path/to/multiqc_report.html +} /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN ALL WORKFLOWS + RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Execute a single named workflow for the pipeline -// See: https://github.com/nf-core/rnaseq/issues/619 -// workflow { - SANGERTOL_BLOBTOOLKIT () + + main: + // + // SUBWORKFLOW: Run initialisation tasks + // + PIPELINE_INITIALISATION ( + params.version, + params.validate_params, + params.monochrome_logs, + args, + params.outdir, + ) + + // + // WORKFLOW: Run main workflow + // + SANGERTOL_BLOBTOOLKIT ( + PIPELINE_INITIALISATION.out.fasta, + PIPELINE_INITIALISATION.out.databases, + ) + // + // SUBWORKFLOW: Run completion tasks + // + PIPELINE_COMPLETION ( + params.email, + params.email_on_fail, + params.plaintext_email, + params.outdir, + params.monochrome_logs, + params.hook_url, + SANGERTOL_BLOBTOOLKIT.out.multiqc_report + ) } /* diff --git a/modules.json b/modules.json index 3ce2a831..37b64ab7 100644 --- a/modules.json +++ b/modules.json @@ -7,106 +7,117 @@ "nf-core": { "blast/blastn": { "branch": "master", - "git_sha": "209e5a3e2753c5e628736a662c877c20f341ee15", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/blast/blastn/blast-blastn.diff" }, - "busco": { + "busco/busco": { "branch": "master", - "git_sha": "e3126f437c336c826f242842fe51769cfce0ec2d", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], - "patch": "modules/nf-core/busco/busco.diff" + "patch": "modules/nf-core/busco/busco/busco-busco.diff" }, "cat/cat": { "branch": "master", - "git_sha": "9437e6053dccf4aafa022bfd6e7e9de67e625af8", - "installed_by": ["modules"] - }, - "custom/dumpsoftwareversions": { - "branch": "master", - "git_sha": "de45447d060b8c8b98575bc637a4a575fd0638e1", + "git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296", "installed_by": ["modules"] }, "diamond/blastp": { "branch": "master", - "git_sha": "b29f6beb86d1d24d680277fb1a3f4de7b8b8a92c", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/diamond/blastp/diamond-blastp.diff" }, "diamond/blastx": { "branch": "master", - "git_sha": "b29f6beb86d1d24d680277fb1a3f4de7b8b8a92c", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/diamond/blastx/diamond-blastx.diff" }, "fastawindows": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/fastawindows/fastawindows.diff" }, "gunzip": { "branch": "master", - "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "minimap2/align": { "branch": "master", - "git_sha": "2c2d1cf80866dbd6dd0ea5d61ddd59533a72d41e", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "multiqc": { "branch": "master", - "git_sha": "b7ebe95761cd389603f9cc0e0dc384c0f663815a", + "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", "installed_by": ["modules"] }, "pigz/compress": { "branch": "master", - "git_sha": "0eab94fc1e48703c1b0a8704bd665f554905c39d", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, - "samtools/fasta": { - "branch": "master", - "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", - "installed_by": ["modules"], - "patch": "modules/nf-core/samtools/fasta/samtools-fasta.diff" - }, "samtools/flagstat": { "branch": "master", - "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", "installed_by": ["modules"] }, "samtools/index": { "branch": "master", - "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "git_sha": "b13f07be4c508d6ff6312d354d09f2493243e208", "installed_by": ["modules"] }, "samtools/view": { "branch": "master", - "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", - "installed_by": ["modules"] + "git_sha": "2d20463181b1c38981a02e90d3084b5f9fa8d540", + "installed_by": ["modules"], + "patch": "modules/nf-core/samtools/view/samtools-view.diff" }, "seqtk/subseq": { "branch": "master", - "git_sha": "7f88aae93c69586c0789322b77743ee0ef469502", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/seqtk/subseq/seqtk-subseq.diff" }, + "untar": { + "branch": "master", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", + "installed_by": ["modules"] + }, "windowmasker/mkcounts": { "branch": "master", - "git_sha": "32cac29d4a92220965dace68a1fb0bb2e3547cac", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, "windowmasker/ustat": { "branch": "master", - "git_sha": "32cac29d4a92220965dace68a1fb0bb2e3547cac", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] } } }, "subworkflows": { - "nf-core": {} + "nf-core": { + "utils_nextflow_pipeline": { + "branch": "master", + "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", + "installed_by": ["subworkflows"] + }, + "utils_nfcore_pipeline": { + "branch": "master", + "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", + "installed_by": ["subworkflows"] + }, + "utils_nfschema_plugin": { + "branch": "master", + "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", + "installed_by": ["subworkflows"] + } + } } } } diff --git a/modules/local/blobtoolkit/chunk.nf b/modules/local/blobtoolkit/chunk.nf index 7dad9182..7bc5ac0c 100644 --- a/modules/local/blobtoolkit/chunk.nf +++ b/modules/local/blobtoolkit/chunk.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_CHUNK { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_CHUNK module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.4" input: tuple val(meta) , path(fasta) diff --git a/modules/local/blobtoolkit/countbuscos.nf b/modules/local/blobtoolkit/countbuscos.nf index 1b415504..ec96df6a 100644 --- a/modules/local/blobtoolkit/countbuscos.nf +++ b/modules/local/blobtoolkit/countbuscos.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_COUNTBUSCOS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_COUNTBUSCOS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.4" input: tuple val(meta), path(table, stageAs: 'dir??/*') diff --git a/modules/local/blobtoolkit/createblobdir.nf b/modules/local/blobtoolkit/createblobdir.nf index dfaddb7d..348122f2 100644 --- a/modules/local/blobtoolkit/createblobdir.nf +++ b/modules/local/blobtoolkit/createblobdir.nf @@ -5,14 +5,14 @@ process BLOBTOOLKIT_CREATEBLOBDIR { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_BLOBDIR module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.4" input: tuple val(meta), path(window, stageAs: 'windowstats/*') tuple val(meta1), path(busco, stageAs: 'lineage??/*') tuple val(meta2), path(blastp) tuple val(meta3), path(yaml) - path(taxdump) + path(taxdump, stageAs: 'taxdump/taxdump.json') output: tuple val(meta), path(prefix), emit: blobdir @@ -30,7 +30,7 @@ process BLOBTOOLKIT_CREATEBLOBDIR { blobtools replace \\ --bedtsvdir windowstats \\ --meta ${yaml} \\ - --taxdump ${taxdump} \\ + --taxdump \$(dirname ${taxdump}) \\ --taxrule buscogenes \\ ${busco_args} \\ ${hits_blastp} \\ diff --git a/modules/local/blobtoolkit/extractbuscos.nf b/modules/local/blobtoolkit/extractbuscos.nf index 1e4440cb..d1015fa6 100644 --- a/modules/local/blobtoolkit/extractbuscos.nf +++ b/modules/local/blobtoolkit/extractbuscos.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_EXTRACTBUSCOS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_EXTRACTBUSCOS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.4" input: tuple val(meta), path(fasta) diff --git a/modules/local/blobtoolkit/metadata.nf b/modules/local/blobtoolkit/metadata.nf deleted file mode 100644 index ffae2a8c..00000000 --- a/modules/local/blobtoolkit/metadata.nf +++ /dev/null @@ -1,33 +0,0 @@ -process BLOBTOOLKIT_METADATA { - tag "$meta.id" - label 'process_single' - - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "BLOBTOOLKIT_METADATA module does not support Conda. Please use Docker / Singularity / Podman instead." - } - container "docker.io/genomehubs/blobtoolkit:4.3.9" - - input: - tuple val(meta), path(yaml) - - output: - tuple val(meta), path("*.metadata.yaml"), emit: yaml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - """ - btk pipeline add-summary-to-metadata \\ - --config ${yaml} \\ - --out ${prefix}.metadata.yaml - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - blobtoolkit: \$(btk --version | cut -d' ' -f2 | sed 's/v//') - END_VERSIONS - """ -} diff --git a/modules/local/blobtoolkit/summary.nf b/modules/local/blobtoolkit/summary.nf index 9b1a262f..9c2713be 100644 --- a/modules/local/blobtoolkit/summary.nf +++ b/modules/local/blobtoolkit/summary.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_SUMMARY { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_SUMMARY module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.4" input: tuple val(meta), path(blobdir) diff --git a/modules/local/blobtoolkit/unchunk.nf b/modules/local/blobtoolkit/unchunk.nf index 5285b0dc..7c7e3814 100644 --- a/modules/local/blobtoolkit/unchunk.nf +++ b/modules/local/blobtoolkit/unchunk.nf @@ -5,7 +5,7 @@ process BLOBTOOLKIT_UNCHUNK { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_UNCHUNK module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.4" input: tuple val(meta), path(blast_table) diff --git a/modules/local/blobtoolkit/updateblobdir.nf b/modules/local/blobtoolkit/updateblobdir.nf index 50167f8b..5bb83dfe 100644 --- a/modules/local/blobtoolkit/updateblobdir.nf +++ b/modules/local/blobtoolkit/updateblobdir.nf @@ -5,13 +5,15 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_BLOBDIR module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.4" input: tuple val(meta), path(input, stageAs: "input_blobdir") - tuple val(meta1), path(blastx, stageAs: "blastx.txt") - tuple val(meta2), path(blastn, stageAs: "blastn.txt") - path(taxdump) + tuple val(meta2), path(synonyms_tsv) + tuple val(meta3), path(categories_tsv) + tuple val(meta4), path(blastx, stageAs: "blastx.txt") + tuple val(meta5), path(blastn, stageAs: "blastn.txt") + path(taxdump, stageAs: 'taxdump/taxdump.json') output: tuple val(meta), path(prefix), emit: blobdir @@ -25,15 +27,19 @@ process BLOBTOOLKIT_UPDATEBLOBDIR { prefix = task.ext.prefix ?: "${meta.id}" def hits_blastx = blastx ? "--hits ${blastx}" : "" def hits_blastn = blastn ? "--hits ${blastn}" : "" + def syn = synonyms_tsv ? "--synonyms ${synonyms_tsv}" : "" + def cat = categories_tsv ? "--text ${categories_tsv}" : "" + def head = (synonyms_tsv || categories_tsv) ? "--text-header" : "" """ # In-place modifications are not great in Nextflow, so work on a copy of ${input} mkdir ${prefix} cp --preserve=timestamp ${input}/* ${prefix}/ blobtools replace \\ - --taxdump ${taxdump} \\ + --taxdump \$(dirname ${taxdump}) \\ --taxrule bestdistorder=buscoregions \\ ${hits_blastx} \\ ${hits_blastn} \\ + ${syn} ${cat} ${head} \\ --threads ${task.cpus} \\ $args \\ ${prefix} diff --git a/modules/local/blobtoolkit/updatemeta.nf b/modules/local/blobtoolkit/updatemeta.nf index 0c3aaaaf..352cd904 100644 --- a/modules/local/blobtoolkit/updatemeta.nf +++ b/modules/local/blobtoolkit/updatemeta.nf @@ -5,17 +5,11 @@ process BLOBTOOLKIT_UPDATEMETA { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_UPDATEMETA module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.4" input: tuple val(meta), path(input) - val reads path versions - // The following are passed as "val" because we just want to know the full paths. No staging necessary - val blastp - val blastx - val blastn - val taxdump output: tuple val(meta), path("*.json"), emit: json @@ -27,17 +21,11 @@ process BLOBTOOLKIT_UPDATEMETA { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def input_reads = reads.collect{"--read_id ${it[0].id} --read_type ${it[0].datatype} --read_path ${it[1]}"}.join(' ') """ update_versions.py \\ ${args} \\ --meta_in ${input}/meta.json \\ --software ${versions} \\ - --blastp ${blastp} \\ - --blastx ${blastx} \\ - --blastn ${blastn} \\ - --taxdump ${taxdump} \\ - $input_reads \\ --meta_out ${prefix}.meta.json cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/blobtoolkit/windowstats.nf b/modules/local/blobtoolkit/windowstats.nf index d432a8ff..95a80880 100644 --- a/modules/local/blobtoolkit/windowstats.nf +++ b/modules/local/blobtoolkit/windowstats.nf @@ -4,7 +4,7 @@ process BLOBTOOLKIT_WINDOWSTATS { if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { exit 1, "BLOBTOOLKIT_WINDOWSTATS module does not support Conda. Please use Docker / Singularity / Podman instead." } - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.4" input: tuple val(meta), path(tsv) diff --git a/modules/local/fetchngssamplesheet_check.nf b/modules/local/fetchngssamplesheet_check.nf deleted file mode 100644 index 962768d5..00000000 --- a/modules/local/fetchngssamplesheet_check.nf +++ /dev/null @@ -1,32 +0,0 @@ -process FETCHNGSSAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.9.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.9--1' : - 'biocontainers/python:3.9--1' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in sanger-tol/blobtoolkit/bin/ - """ - check_fetchngs_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - check_fetchngs_samplesheet.py: \$(check_fetchngs_samplesheet.py --version | cut -d' ' -f2) - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/generate_config.nf b/modules/local/generate_config.nf index 3aab7ee9..cd3ccb75 100644 --- a/modules/local/generate_config.nf +++ b/modules/local/generate_config.nf @@ -3,19 +3,24 @@ process GENERATE_CONFIG { label 'process_single' conda "conda-forge::requests=2.28.1 conda-forge::pyyaml=6.0" - container "docker.io/genomehubs/blobtoolkit:4.3.9" + container "docker.io/genomehubs/blobtoolkit:4.4.4" input: tuple val(meta), val(fasta) val taxon_query val busco_lin + tuple val(metabn), path(blastn) path lineage_tax_ids - tuple val(meta2), path(blastn) + // The following are passed as "val" because we need to know the original paths. Staging would prevent that + val reads + val db_paths output: - tuple val(meta), path("*.yaml"), emit: yaml - tuple val(meta), path("*.csv") , emit: csv - path "versions.yml" , emit: versions + tuple val(meta), path("*.yaml") , emit: yaml + tuple val(meta), path("*.csv") , emit: csv + tuple val(meta), path("*.synonyms.tsv") , emit: synonyms_tsv, optional: true + tuple val(meta), path("*.categories.tsv"), emit: categories_tsv, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -25,6 +30,9 @@ process GENERATE_CONFIG { def prefix = task.ext.prefix ?: "${meta.id}" def busco_param = busco_lin ? "--busco '${busco_lin}'" : "" def accession_params = params.accession ? "--accession ${params.accession}" : "" + def input_reads = reads.collect{"--read_id ${it[0].id} --read_type ${it[0].datatype} --read_layout ${it[0].layout} --read_path ${it[1]}"}.join(' ') + def input_databases = db_paths.collect{"--${it[0].type} ${it[1]}"}.join(' ') + """ generate_config.py \\ --fasta $fasta \\ @@ -32,9 +40,10 @@ process GENERATE_CONFIG { --lineage_tax_ids $lineage_tax_ids \\ $busco_param \\ $accession_params \\ - --blastn $blastn \\ - --yml_out ${prefix}.yaml \\ - --csv_out ${prefix}.csv + --nt $blastn \\ + $input_reads \\ + $input_databases \\ + --output_prefix ${prefix} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/jsonify_taxdump.nf b/modules/local/jsonify_taxdump.nf new file mode 100644 index 00000000..fdc871fb --- /dev/null +++ b/modules/local/jsonify_taxdump.nf @@ -0,0 +1,32 @@ +process JSONIFY_TAXDUMP { + tag "$meta.id" + label 'process_single' + + conda "conda-forge::requests=2.28.1 conda-forge::pyyaml=6.0" + container "docker.io/genomehubs/blobtoolkit:4.4.4" + + input: + tuple val(meta), path(taxdump) + + output: + tuple val(meta), path("*.json") , emit: json + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + jsonify_taxdump.py \\ + $taxdump \\ + $args \\ + > ${prefix}.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + jsonify_taxdump.py: \$(jsonify_taxdump.py --version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/local/restructurebuscodir.nf b/modules/local/restructurebuscodir.nf index 4c58b0ed..a8728101 100644 --- a/modules/local/restructurebuscodir.nf +++ b/modules/local/restructurebuscodir.nf @@ -8,7 +8,7 @@ process RESTRUCTUREBUSCODIR { 'nf-core/ubuntu:20.04' }" input: - tuple val(meta), val(lineage), path(batch_summary), path(short_summaries_txt), path(short_summaries_json), path(busco_dir) + tuple val(meta), val(lineage), path(batch_summary), path(short_summary_txt), path(short_summary_json), path(full_table), path(missing_busco_list), path(single_copy_busco_sequences), path(multi_copy_busco_sequences), path(fragmented_busco_sequences), path(hmmer_output) output: tuple val(meta), path("${lineage}"), emit: clean_busco_dir @@ -21,24 +21,23 @@ process RESTRUCTUREBUSCODIR { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" """ - mkdir ${lineage} + mkdir -p ${lineage} - cp --dereference ${batch_summary} ${lineage}/short_summary.tsv - [ -n "${short_summaries_txt}" ] && cp --dereference ${short_summaries_txt} ${lineage}/short_summary.txt - [ -n "${short_summaries_json}" ] && cp --dereference ${short_summaries_json} ${lineage}/short_summary.json + [ -n "${batch_summary}" ] && cp --dereference ${batch_summary} ${lineage}/short_summary.tsv + [ -n "${short_summary_txt}" ] && cp --dereference ${short_summary_txt} ${lineage}/short_summary.txt + [ -n "${short_summary_json}" ] && cp --dereference ${short_summary_json} ${lineage}/short_summary.json - # Should we compress these ? - [ -e ${busco_dir}/*/run_*/full_table.tsv ] && cp ${busco_dir}/*/run_*/full_table.tsv ${lineage}/ - [ -e ${busco_dir}/*/run_*/missing_busco_list.tsv ] && cp ${busco_dir}/*/run_*/missing_busco_list.tsv ${lineage}/ + [ -e "${full_table}" ] && cp ${full_table} ${lineage}/ + [ -e "${missing_busco_list}" ] && cp ${missing_busco_list} ${lineage}/ - tar czf ${lineage}/single_copy_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences single_copy_busco_sequences - tar czf ${lineage}/multi_copy_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences multi_copy_busco_sequences - tar czf ${lineage}/fragmented_busco_sequences.tar.gz -C ${busco_dir}/*/run_*/busco_sequences fragmented_busco_sequences - tar czf ${lineage}/hmmer_output.tar.gz --exclude=.checkpoint -C ${busco_dir}/*/run_* hmmer_output + tar czf ${lineage}/single_copy_busco_sequences.tar.gz -C \$(dirname ${single_copy_busco_sequences}) \$(basename ${single_copy_busco_sequences}) + tar czf ${lineage}/multi_copy_busco_sequences.tar.gz -C \$(dirname ${multi_copy_busco_sequences}) \$(basename ${multi_copy_busco_sequences}) + tar czf ${lineage}/fragmented_busco_sequences.tar.gz -C \$(dirname ${fragmented_busco_sequences}) \$(basename ${fragmented_busco_sequences}) + tar czf ${lineage}/hmmer_output.tar.gz --exclude=.checkpoint -C \$(dirname ${hmmer_output}) \$(basename ${hmmer_output}) cat <<-END_VERSIONS > versions.yml "${task.process}": - tar: \$(tar --version| awk 'NR==1 {print \$3}' ) + tar: \$(tar --version| awk 'NR==1 {print \$4}' ) END_VERSIONS """ } diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf deleted file mode 100644 index 760f3e44..00000000 --- a/modules/local/samplesheet_check.nf +++ /dev/null @@ -1,32 +0,0 @@ -process SAMPLESHEET_CHECK { - tag "$samplesheet" - label 'process_single' - - conda "conda-forge::python=3.9.1" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.9--1' : - 'biocontainers/python:3.9--1' }" - - input: - path samplesheet - - output: - path '*.csv' , emit: csv - path "versions.yml", emit: versions - - when: - task.ext.when == null || task.ext.when - - script: // This script is bundled with the pipeline, in sanger-tol/blobtoolkit/bin/ - """ - check_samplesheet.py \\ - $samplesheet \\ - samplesheet.valid.csv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - check_samplesheet.py: \$(check_samplesheet.py --version | cut -d' ' -f2) - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/nf-core/blast/blastn/blast-blastn.diff b/modules/nf-core/blast/blastn/blast-blastn.diff index e01e07cb..d4a5a366 100644 --- a/modules/nf-core/blast/blastn/blast-blastn.diff +++ b/modules/nf-core/blast/blastn/blast-blastn.diff @@ -25,7 +25,22 @@ Changes in module 'nf-core/blast/blastn' """ if [ "${is_compressed}" == "true" ]; then -@@ -39,8 +41,15 @@ +@@ -35,12 +37,30 @@ + fi + echo Using \$DB + ++ if [ -n "${taxid}" ]; then ++ # Symlink the tax* files (needed for -taxid options to work) ++ for file in taxdb.btd taxdb.bti taxonomy4blast.sqlite3; do ++ if [ ! -f ${db}/\$file ]; then ++ echo "Error: \$file not found in ${db}" ++ exit 1 ++ fi ++ ln -s ${db}/\$file . ++ done ++ fi ++ + blastn \\ -num_threads ${task.cpus} \\ -db \$DB \\ -query ${fasta_name} \\ diff --git a/modules/nf-core/blast/blastn/environment.yml b/modules/nf-core/blast/blastn/environment.yml index cb9b15dd..777e097e 100644 --- a/modules/nf-core/blast/blastn/environment.yml +++ b/modules/nf-core/blast/blastn/environment.yml @@ -1,7 +1,5 @@ -name: blast_blastn channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::blast=2.14.1 + - bioconda::blast=2.15.0 diff --git a/modules/nf-core/blast/blastn/main.nf b/modules/nf-core/blast/blastn/main.nf index d674989a..84756771 100644 --- a/modules/nf-core/blast/blastn/main.nf +++ b/modules/nf-core/blast/blastn/main.nf @@ -3,8 +3,8 @@ process BLAST_BLASTN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.14.1--pl5321h6f7f691_0': - 'biocontainers/blast:2.14.1--pl5321h6f7f691_0' }" + 'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1': + 'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }" input: tuple val(meta) , path(fasta) @@ -37,6 +37,17 @@ process BLAST_BLASTN { fi echo Using \$DB + if [ -n "${taxid}" ]; then + # Symlink the tax* files (needed for -taxid options to work) + for file in taxdb.btd taxdb.bti taxonomy4blast.sqlite3; do + if [ ! -f ${db}/\$file ]; then + echo "Error: \$file not found in ${db}" + exit 1 + fi + ln -s ${db}/\$file . + done + fi + blastn \\ -num_threads ${task.cpus} \\ -db \$DB \\ diff --git a/modules/nf-core/blast/blastn/meta.yml b/modules/nf-core/blast/blastn/meta.yml index a0d64dd6..0f5e41bb 100644 --- a/modules/nf-core/blast/blastn/meta.yml +++ b/modules/nf-core/blast/blastn/meta.yml @@ -13,39 +13,42 @@ tools: documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs doi: 10.1016/S0022-2836(05)80360-2 licence: ["US-Government-Work"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input fasta file containing queries sequences - pattern: "*.{fa,fasta,fa.gz,fasta.gz}" - - meta2: - type: map - description: | - Groovy Map containing db information - e.g. [ id:'test2', single_end:false ] - - db: - type: directory - description: Directory containing the blast database - pattern: "*" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file containing queries sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing db information + e.g. [ id:'test2', single_end:false ] + - db: + type: directory + description: Directory containing the blast database + pattern: "*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - txt: - type: file - description: File containing blastn hits - pattern: "*.txt" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: File containing blastn hits + pattern: "*.txt" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test b/modules/nf-core/blast/blastn/tests/main.nf.test index 02ecfab5..aacc93c5 100644 --- a/modules/nf-core/blast/blastn/tests/main.nf.test +++ b/modules/nf-core/blast/blastn/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_process { script "../../makeblastdb/main.nf" process { """ - input[0] = [ [id:'test2'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } } @@ -29,7 +29,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[1] = BLAST_MAKEBLASTDB.out.db """ } @@ -53,7 +53,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ] input[1] = BLAST_MAKEBLASTDB.out.db """ } diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test.snap b/modules/nf-core/blast/blastn/tests/main.nf.test.snap index d1b5f3f2..dd8b7751 100644 --- a/modules/nf-core/blast/blastn/tests/main.nf.test.snap +++ b/modules/nf-core/blast/blastn/tests/main.nf.test.snap @@ -2,7 +2,7 @@ "versions": { "content": [ [ - "versions.yml:md5,2d5ffadc7035672f6a9e00b01d1751ea" + "versions.yml:md5,faf2471d836ebbf24d96d3e1f8720b17" ] ], "timestamp": "2023-12-11T07:20:03.54997013" @@ -10,7 +10,7 @@ "versions_zipped": { "content": [ [ - "versions.yml:md5,2d5ffadc7035672f6a9e00b01d1751ea" + "versions.yml:md5,faf2471d836ebbf24d96d3e1f8720b17" ] ], "timestamp": "2023-12-11T07:20:12.925782708" diff --git a/modules/nf-core/busco/busco.diff b/modules/nf-core/busco/busco/busco-busco.diff similarity index 87% rename from modules/nf-core/busco/busco.diff rename to modules/nf-core/busco/busco/busco-busco.diff index 775788fb..1317574a 100644 --- a/modules/nf-core/busco/busco.diff +++ b/modules/nf-core/busco/busco/busco-busco.diff @@ -1,8 +1,8 @@ -Changes in module 'nf-core/busco' ---- modules/nf-core/busco/main.nf -+++ modules/nf-core/busco/main.nf +Changes in module 'nf-core/busco/busco' +--- modules/nf-core/busco/busco/main.nf ++++ modules/nf-core/busco/busco/main.nf @@ -1,6 +1,5 @@ - process BUSCO { + process BUSCO_BUSCO { - tag "$meta.id" - label 'process_medium' + tag "${meta.id}_${lineage}" diff --git a/modules/nf-core/busco/environment.yml b/modules/nf-core/busco/busco/environment.yml similarity index 50% rename from modules/nf-core/busco/environment.yml rename to modules/nf-core/busco/busco/environment.yml index f872d057..5b918b45 100644 --- a/modules/nf-core/busco/environment.yml +++ b/modules/nf-core/busco/busco/environment.yml @@ -1,7 +1,5 @@ -name: busco channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::busco=5.5.0 + - bioconda::busco=5.7.1 diff --git a/modules/nf-core/busco/main.nf b/modules/nf-core/busco/busco/main.nf similarity index 80% rename from modules/nf-core/busco/main.nf rename to modules/nf-core/busco/busco/main.nf index 83d8eacd..319f7235 100644 --- a/modules/nf-core/busco/main.nf +++ b/modules/nf-core/busco/busco/main.nf @@ -1,13 +1,13 @@ -process BUSCO { +process BUSCO_BUSCO { tag "${meta.id}_${lineage}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/busco:5.5.0--pyhdfd78af_0': - 'biocontainers/busco:5.5.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/busco:5.7.1--pyhdfd78af_0': + 'biocontainers/busco:5.7.1--pyhdfd78af_0' }" input: - tuple val(meta), path('tmp_input/*') + tuple val(meta), path(fasta, stageAs:'tmp_input/*') val mode // Required: One of genome, proteins, or transcriptome val lineage // Required: lineage to check against, "auto" enables --auto-lineage instead path busco_lineages_path // Recommended: path to busco lineages - downloads if not set @@ -15,13 +15,13 @@ process BUSCO { output: tuple val(meta), path("*-busco.batch_summary.txt") , emit: batch_summary - tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt, optional: true - tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json, optional: true - tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table, optional: true - tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list, optional: true - tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins, optional: true + tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt , optional: true + tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json , optional: true + tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table , optional: true + tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list , optional: true + tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins , optional: true tuple val(meta), path("*-busco/*/run_*/busco_sequences") , emit: seq_dir - tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir, optional: true + tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir , optional: true tuple val(meta), path("*-busco") , emit: busco_dir path "versions.yml" , emit: versions @@ -90,4 +90,17 @@ process BUSCO { busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}-${lineage}" + def fasta_name = files(fasta).first().name - '.gz' + """ + touch ${prefix}-busco.batch_summary.txt + mkdir -p ${prefix}-busco/$fasta_name/run_${lineage}/busco_sequences + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/busco/busco/meta.yml b/modules/nf-core/busco/busco/meta.yml new file mode 100644 index 00000000..7cb6d69c --- /dev/null +++ b/modules/nf-core/busco/busco/meta.yml @@ -0,0 +1,152 @@ +name: busco_busco +description: Benchmarking Universal Single Copy Orthologs +keywords: + - quality control + - genome + - transcriptome + - proteome +tools: + - busco: + description: BUSCO provides measures for quantitative assessment of genome assembly, + gene set, and transcriptome completeness based on evolutionarily informed expectations + of gene content from near-universal single-copy orthologs selected from OrthoDB. + homepage: https://busco.ezlab.org/ + documentation: https://busco.ezlab.org/busco_userguide.html + tool_dev_url: https://gitlab.com/ezlab/busco + doi: "10.1007/978-1-4939-9173-0_14" + licence: ["MIT"] + identifier: biotools:busco +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Nucleic or amino acid sequence file in FASTA format. + pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" + - - mode: + type: string + description: The mode to run Busco in. One of genome, proteins, or transcriptome + pattern: "{genome,proteins,transcriptome}" + - - lineage: + type: string + description: The BUSCO lineage to use, or "auto" to automatically select lineage + - - busco_lineages_path: + type: directory + description: Path to local BUSCO lineages directory. + - - config_file: + type: file + description: Path to BUSCO config file. +output: + - batch_summary: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco.batch_summary.txt": + type: file + description: Summary of all sequence files analyzed + pattern: "*-busco.batch_summary.txt" + - short_summaries_txt: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - short_summary.*.txt: + type: file + description: Short Busco summary in plain text format + pattern: "short_summary.*.txt" + - short_summaries_json: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - short_summary.*.json: + type: file + description: Short Busco summary in JSON format + pattern: "short_summary.*.json" + - full_table: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/full_table.tsv": + type: file + description: Full BUSCO results table + pattern: "full_table.tsv" + - missing_busco_list: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/missing_busco_list.tsv": + type: file + description: List of missing BUSCOs + pattern: "missing_busco_list.tsv" + - single_copy_proteins: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/single_copy_proteins.faa": + type: file + description: Fasta file of single copy proteins (transcriptome mode) + pattern: "single_copy_proteins.faa" + - seq_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/run_*/busco_sequences": + type: directory + description: BUSCO sequence directory + pattern: "busco_sequences" + - translated_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco/*/translated_proteins": + type: directory + description: Six frame translations of each transcript made by the transcriptome + mode + pattern: "translated_dir" + - busco_dir: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*-busco": + type: directory + description: BUSCO lineage specific output + pattern: "*-busco" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@priyanka-surana" + - "@charles-plessy" + - "@mahesh-panchal" + - "@muffato" + - "@jvhagey" + - "@gallvp" +maintainers: + - "@priyanka-surana" + - "@charles-plessy" + - "@mahesh-panchal" + - "@muffato" + - "@jvhagey" + - "@gallvp" diff --git a/modules/nf-core/busco/busco/tests/main.nf.test b/modules/nf-core/busco/busco/tests/main.nf.test new file mode 100644 index 00000000..bb7b49a9 --- /dev/null +++ b/modules/nf-core/busco/busco/tests/main.nf.test @@ -0,0 +1,415 @@ +nextflow_process { + + name "Test Process BUSCO_BUSCO" + script "../main.nf" + process "BUSCO_BUSCO" + + tag "modules" + tag "modules_nfcore" + tag "busco" + tag "busco/busco" + + test("test_busco_genome_single_fasta") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues + input[3] = [] // Download busco lineage + input[4] = [] // No config + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + } + + test("test_busco_genome_multi_fasta") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/genome.fasta', checkIfExists: true) + ] + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1][0]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_txt[0][1][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1][0]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + with(path(process.out.short_summaries_json[0][1][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1][0]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(file(process.out.seq_dir[0][1][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_eukaryote_metaeuk") { + + config './nextflow.metaeuk.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'eukaryota_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_eukaryote_augustus") { + + config './nextflow.augustus.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'eukaryota_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + assert snapshot( + process.out.batch_summary[0][1], + process.out.versions[0] + ).match() + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Augustus did not recognize any genes') + + } + + assert process.out.short_summaries_json == [] + assert process.out.short_summaries_txt == [] + assert process.out.missing_busco_list == [] + assert process.out.full_table == [] + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_protein") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/candidatus_portiera_aleyrodidarum/genome/proteome.fasta', checkIfExists: true) + ] + input[1] = 'proteins' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + + assert process.out.single_copy_proteins == [] + assert process.out.translated_dir == [] + } + + } + + test("test_busco_transcriptome") { + + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/illumina/fasta/test1.contigs.fa.gz', checkIfExists: true) + ] + input[1] = 'transcriptome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assert process.success + + with(path(process.out.short_summaries_txt[0][1]).text) { + assert contains('BUSCO version') + assert contains('The lineage dataset is') + assert contains('BUSCO was run in mode') + assert contains('Complete BUSCOs') + assert contains('Missing BUSCOs') + assert contains('Dependencies and versions') + } + + with(path(process.out.short_summaries_json[0][1]).text) { + assert contains('one_line_summary') + assert contains('mode') + assert contains('dataset') + } + + assert snapshot( + process.out.batch_summary[0][1], + process.out.full_table[0][1], + process.out.missing_busco_list[0][1], + process.out.translated_dir[0][1], + process.out.single_copy_proteins[0][1], + process.out.versions[0] + ).match() + + with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) { + assert contains('single_copy_busco_sequences.tar.gz') + assert contains('multi_copy_busco_sequences.tar.gz') + assert contains('fragmented_busco_sequences.tar.gz') + } + + with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) { + assert contains('DEBUG:busco.run_BUSCO') + assert contains('Results from dataset') + assert contains('how to cite BUSCO') + } + } + + } + + test("minimal-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true) + ] + input[1] = 'genome' + input[2] = 'bacteria_odb10' + input[3] = [] + input[4] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/busco/busco/tests/main.nf.test.snap b/modules/nf-core/busco/busco/tests/main.nf.test.snap new file mode 100644 index 00000000..1b6411bc --- /dev/null +++ b/modules/nf-core/busco/busco/tests/main.nf.test.snap @@ -0,0 +1,230 @@ +{ + "minimal-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test" + }, + [ + [ + [ + [ + + ] + ] + ] + ] + ] + ], + "9": [ + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "batch_summary": [ + [ + { + "id": "test" + }, + "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "busco_dir": [ + [ + { + "id": "test" + }, + [ + [ + [ + [ + + ] + ] + ] + ] + ] + ], + "full_table": [ + + ], + "missing_busco_list": [ + + ], + "seq_dir": [ + [ + { + "id": "test" + }, + [ + + ] + ] + ], + "short_summaries_json": [ + + ], + "short_summaries_txt": [ + + ], + "single_copy_proteins": [ + + ], + "translated_dir": [ + + ], + "versions": [ + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:28:04.451297" + }, + "test_busco_eukaryote_augustus": { + "content": [ + "test-eukaryota_odb10-busco.batch_summary.txt:md5,3ea3bdc423a461dae514d816bdc61c89", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:26:36.974986" + }, + "test_busco_genome_single_fasta": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,21b3fb771cf36be917cc451540d999be", + "full_table.tsv:md5,638fe7590f442c57361554dae330eca1", + "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:22:45.07816" + }, + "test_busco_genome_multi_fasta": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,fcd3c208913e8abda3d6742c43fec5fa", + [ + "full_table.tsv:md5,c657edcc7d0de0175869717551df6e83", + "full_table.tsv:md5,638fe7590f442c57361554dae330eca1" + ], + [ + "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112", + "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a" + ], + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:23:50.255602" + }, + "test_busco_eukaryote_metaeuk": { + "content": [ + "test-eukaryota_odb10-busco.batch_summary.txt:md5,ff6d8277e452a83ce9456bbee666feb6", + "full_table.tsv:md5,92b1b1d5cb5ea0e2093d16f00187e8c7", + "missing_busco_list.tsv:md5,0352e563de290bf804c708323c35a9e3", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:25:38.159041" + }, + "test_busco_transcriptome": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,8734b3f379c4c0928e5dd4ea1873dc64", + "full_table.tsv:md5,1b2ce808fdafa744c56b5f781551272d", + "missing_busco_list.tsv:md5,a6931b6470262b997b8b99ea0f1d14a4", + [ + "1024388at2.faa:md5,797d603d262a6595a112e25b73e878b0", + "1054741at2.faa:md5,cd4b928cba6b19b4437746ba507e7195", + "1093223at2.faa:md5,df9549708e5ffcfaee6a74dd70a0e5dc", + "1151822at2.faa:md5,12726afc1cdc40c13392e1596e93df3a", + "143460at2.faa:md5,d887431fd988a5556a523440f02d9594", + "1491686at2.faa:md5,d03362d19979b27306c192f1c74a84e5", + "1504821at2.faa:md5,4f5f6e5c57bac0092c1d85ded73d7e67", + "1574817at2.faa:md5,1153e55998c2929eacad2aed7d08d248", + "1592033at2.faa:md5,bb7a59e5f3a57ba12d10dabf4c77ab57", + "1623045at2.faa:md5,8fe38155feb1802beb97ef7714837bf5", + "1661836at2.faa:md5,6c6d592c2fbb0d7a4e5e1f47a15644f0", + "1674344at2.faa:md5,bb41b44e53565a54cadf0b780532fe08", + "1698718at2.faa:md5,f233860000028eb00329aa85236c71e5", + "1990650at2.faa:md5,34a2d29c5f8b6253159ddb7a43fa1829", + "223233at2.faa:md5,dec6705c7846c989296e73942f953cbc", + "402899at2.faa:md5,acc0f271f9a586d2ce1ee41669b22999", + "505485at2.faa:md5,aa0391f8fa5d9bd19b30d844d5a99845", + "665824at2.faa:md5,47f8ad43b6a6078206feb48c2e552793", + "776861at2.faa:md5,f8b90c13f7c6be828dea3bb920195e3d", + "874197at2.faa:md5,8d22a35a768debe6f376fc695d233a69", + "932854at2.faa:md5,2eff2de1ab83b22f3234a529a44e22bb", + "95696at2.faa:md5,247bfd1aef432f7b5456307768e9149c" + ], + "single_copy_proteins.faa:md5,73e2c5d6a9b0f01f2deea3cc5f21b764", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:27:53.992893" + }, + "test_busco_protein": { + "content": [ + "test-bacteria_odb10-busco.batch_summary.txt:md5,f5a782378f9f94a748aa907381fdef91", + "full_table.tsv:md5,812ab6a0496fccab774643cf40c4f2a8", + "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112", + "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T13:27:12.724862" + } +} \ No newline at end of file diff --git a/modules/nf-core/busco/busco/tests/nextflow.augustus.config b/modules/nf-core/busco/busco/tests/nextflow.augustus.config new file mode 100644 index 00000000..84daa69d --- /dev/null +++ b/modules/nf-core/busco/busco/tests/nextflow.augustus.config @@ -0,0 +1,5 @@ +process { + withName: 'BUSCO_BUSCO' { + ext.args = '--tar --augustus' + } +} diff --git a/modules/nf-core/busco/busco/tests/nextflow.config b/modules/nf-core/busco/busco/tests/nextflow.config new file mode 100644 index 00000000..1ec3fec0 --- /dev/null +++ b/modules/nf-core/busco/busco/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'BUSCO_BUSCO' { + ext.args = '--tar' + } +} diff --git a/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config b/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config new file mode 100644 index 00000000..c1418445 --- /dev/null +++ b/modules/nf-core/busco/busco/tests/nextflow.metaeuk.config @@ -0,0 +1,5 @@ +process { + withName: 'BUSCO_BUSCO' { + ext.args = '--tar --metaeuk' + } +} diff --git a/modules/nf-core/busco/busco/tests/old_test.yml b/modules/nf-core/busco/busco/tests/old_test.yml new file mode 100644 index 00000000..75177f5d --- /dev/null +++ b/modules/nf-core/busco/busco/tests/old_test.yml @@ -0,0 +1,624 @@ +- name: busco test_busco_genome_single_fasta + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_single_fasta -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log + md5sum: 9caf1a1434414c78562eb0bbb9c0e53f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint + contains: + - "Tool: prodigal" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv + md5sum: c56edab1dc1522e993c25ae2b730799f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv + md5sum: b533ef30270f27160acce85a22d01bf5 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "lineage_dataset" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_genome_multi_fasta + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 8c64c1a28b086ef2ee444f99cbed5f7d + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_out.log + md5sum: 8f047bdb33264d22a83920bc2c63f29a + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_err.log + md5sum: c1fdc6977332f53dfe7f632733bb4585 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_out.log + md5sum: 50752acb1c5a20be886bfdfc06635bcb + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/.checkpoint + contains: + - "Tool: prodigal" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.faa + md5sum: 8166471fc5f08c82fd5643ab42327f9d + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.fna + md5sum: ddc508a18f60e7f3314534df50cdf8ca + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa + md5sum: 8166471fc5f08c82fd5643ab42327f9d + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna + md5sum: ddc508a18f60e7f3314534df50cdf8ca + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log + md5sum: c1fdc6977332f53dfe7f632733bb4585 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log + md5sum: 50752acb1c5a20be886bfdfc06635bcb + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.faa + md5sum: e56fd59c38248dc21ac94355dca98121 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.fna + md5sum: b365f84bf99c68357952e0b98ed7ce42 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_err.log + md5sum: e5f14d7925ba14a0f9850542f3739894 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_out.log + md5sum: d41971bfc1b621d4ffd2633bc47017ea + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/full_table.tsv + md5sum: c9651b88b10871abc260ee655898e828 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/missing_busco_list.tsv + md5sum: 9939309df2da5419de88c32d1435c779 + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log + md5sum: 9caf1a1434414c78562eb0bbb9c0e53f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint + contains: + - "Tool: prodigal" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa + md5sum: 836e9a80d33d8b89168f07ddc13ee991 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna + md5sum: 20eeb75f86842e6e136f02bca8b73a9f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log + md5sum: 538510cfc7483498210f01e53fe035ad + - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log + md5sum: 61050b0706addc9498b2088a2d6efa9a + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv + md5sum: c56edab1dc1522e993c25ae2b730799f + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv + md5sum: b533ef30270f27160acce85a22d01bf5 + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_eukaryote_metaeuk + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_metaeuk -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt + md5sum: ff6d8277e452a83ce9456bbee666feb6 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log + md5sum: e63debaa653f18f7405d936050abc093 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv + md5sum: bd880e90b9e5620a58943a3e0f9ff16b + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint + contains: + - "Tool: metaeuk" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa + md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv + md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_eukaryote_augustus + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_augustus -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt + md5sum: ff6d8277e452a83ce9456bbee666feb6 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log + md5sum: e63debaa653f18f7405d936050abc093 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log + contains: + - "metaeuk" + - "easy-predict" + - "Compute score and coverage" + - "Time for processing:" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log + contains: + - "metaeuk" + - "easy-predict" + - "Compute score and coverage" + - "Time for processing:" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint + contains: + - "Tool: bbtools" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv + md5sum: bd880e90b9e5620a58943a3e0f9ff16b + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint + contains: + - "Tool: metaeuk" + - "Completed" + - "jobs" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa + md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv + md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988 + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/versions.yml + +- name: busco test_busco_protein + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_protein -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 7a65e6cbb6c56a2ea4e739ae0aa3297d + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/full_table.tsv + md5sum: 0e34f1011cd83ea1d5d5103ec62b8922 + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/missing_busco_list.tsv + md5sum: 9939309df2da5419de88c32d1435c779 + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/versions.yml + +- name: busco test_busco_transcriptome + command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_transcriptome -c ./tests/config/nextflow.config + tags: + - busco + files: + - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt + contains: + - "BUSCO version" + - "The lineage dataset is" + - "BUSCO was run in mode" + - "Complete BUSCOs" + - "Missing BUSCOs" + - "Dependencies and versions" + - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt + md5sum: 46118ecf60d1b87d22b96d80f4f03632 + - path: output/busco/test-bacteria_odb10-busco/logs/busco.log + contains: + - "DEBUG:busco.run_BUSCO" + - "Results from dataset" + - "how to cite BUSCO" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/.checkpoint + contains: + - "Tool: makeblastdb" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ndb + md5sum: 3788c017fe5e6f0f58224e9cdd21822b + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nhr + md5sum: 8ecd2ce392bb5e25ddbe1d85f879582e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nin + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.njs + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.not + md5sum: 0c340e376c7e85d19f82ec1a833e6a6e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nsq + md5sum: 532d5c0a7ea00fe95ca3c97cb3be6198 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ntf + md5sum: de1250813f0c7affc6d12dac9d0fb6bb + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nto + md5sum: ff74bd41f9cc9b011c63a32c4f7693bf + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_err.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_out.log + contains: + - "# hmmsearch :: search profile(s) against a sequence database" + - "# target sequence database:" + - "Internal pipeline statistics summary:" + - "[ok]" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_err.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_out.log + contains: + - "Building a new DB" + - "Adding sequences from FASTA" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_err.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_out.log + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/.checkpoint + contains: + - "Tool: tblastn" + - "Completed" + - "jobs" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/coordinates.tsv + md5sum: cc30eed321944af293452bdbcfc24292 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_101.temp + md5sum: 73e9c65fc83fedc58f57f09b08f08238 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_119.temp + md5sum: 7fa4cc7955ec0cc36330a221c579b975 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_129.temp + md5sum: 6f1601c875d019e3f6f1f98ed8e988d4 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_138.temp + md5sum: 3f8e034686cd240c2330650d791bcae2 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_143.temp + md5sum: df3dfa8e9ba30ed70cf75b5e7abf2179 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_172.temp + md5sum: 7d463e0e6cf7169bc9077d8dc776dda1 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_178.temp + md5sum: 2288edf7fa4f88f51b4cf4d94086f77e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_188.temp + md5sum: 029906abbad6d87fc57830dd548cac24 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_195.temp + md5sum: 4937f3b348774a31b1160a00297c29cc + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_210.temp + md5sum: afcb20ba4c466479d6b91c8c62251e1f + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_232.temp + md5sum: 2e1e823ce017345bd998191a39fa9924 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_268.temp + md5sum: 08c2d82c34ecffbe1c638b410349412e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_29.temp + md5sum: cd9b63cf93524284781535c888313764 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_44.temp + md5sum: d1929b742b24ebe379bf4801ca882dca + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_58.temp + md5sum: 69215765b010c05336538cb322c900b3 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_72.temp + md5sum: 6feaa1cc3b0899a147ea9d466878f3e3 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_80.temp + md5sum: 13625eae14e860a96ce17cd4e37e9d01 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_81.temp + md5sum: e14b2484649b0dbc8926815c207b806d + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_93.temp + md5sum: 6902c93691df00e690faea914c71839e + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_97.temp + md5sum: 0a0d9d38a83acbd5ad43c29cdf429988 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/tblastn.tsv + contains: + - "TBLASTN" + - "BLAST processed" + - "queries" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/full_table.tsv + md5sum: 24df25199e13c88bd892fc3e7b541ca0 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/hmmer_output.tar.gz + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/missing_busco_list.tsv + md5sum: e7232e2b8cca4fdfdd9e363b39ebbc81 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.json + contains: + - "one_line_summary" + - "mode" + - "dataset" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.txt + contains: + - "# BUSCO version is:" + - "Results:" + - "busco:" + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/single_copy_proteins.faa + md5sum: e04b9465733577ae6e4bccb7aa01e720 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1024388at2.faa + md5sum: 7333c39a20258f20c7019ea0cd83157c + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1054741at2.faa + md5sum: ebb481e77a824685fbe04d8a2f3a0d7d + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1093223at2.faa + md5sum: 34621c7d499034e8f8e6b92fd4020a93 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1151822at2.faa + md5sum: aa89ca381c1c70c9c4e1380351ca7c2a + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/143460at2.faa + md5sum: f2e91d78b8dd3722840378789f29e8c8 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1491686at2.faa + md5sum: 73c25aef5c9cba7f4151804941b146ea + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1504821at2.faa + md5sum: cda556018d1f84ebe517e89f6fc107d0 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1574817at2.faa + md5sum: a9096c9fb8b25c78a72871ab0463acdc + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1592033at2.faa + md5sum: e463d25ce186c0cebfd749474f3a4c64 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1623045at2.faa + md5sum: f2cfd241590c6d8377286d6135480937 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1661836at2.faa + md5sum: 586569546fb9861502468e3d9ba2775c + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1674344at2.faa + md5sum: 24c658bee14ad84b062d81ad96642eb8 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1698718at2.faa + md5sum: 0b8e26ddf5149bbd8805be7af125208d + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1990650at2.faa + md5sum: 159320712ee01fb2ccb31a25df44eead + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/223233at2.faa + md5sum: 812629c0b06ac3d18661c2ca78de0c08 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/402899at2.faa + md5sum: f7ff4e1591342d30b77392a2e84b57d9 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/505485at2.faa + md5sum: 7b34a24fc49c540d46fcf96ff5129564 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/665824at2.faa + md5sum: 4cff2df64f6bcaff8bc19c234c8bcccd + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/776861at2.faa + md5sum: 613af7a3fea30ea2bece66f603b9284a + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/874197at2.faa + md5sum: a7cd1b13c9ef91c7ef4e31614166f197 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/932854at2.faa + md5sum: fe313ffd5efdb0fed887a04fba352552 + - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/95696at2.faa + md5sum: 4e1f30a2fea4dfbf9bb7fae2700622a0 + - path: output/busco/versions.yml diff --git a/modules/nf-core/busco/busco/tests/tags.yml b/modules/nf-core/busco/busco/tests/tags.yml new file mode 100644 index 00000000..7c4d2835 --- /dev/null +++ b/modules/nf-core/busco/busco/tests/tags.yml @@ -0,0 +1,2 @@ +busco/busco: + - "modules/nf-core/busco/busco/**" diff --git a/modules/nf-core/busco/meta.yml b/modules/nf-core/busco/meta.yml deleted file mode 100644 index 90b30d4d..00000000 --- a/modules/nf-core/busco/meta.yml +++ /dev/null @@ -1,96 +0,0 @@ -name: busco -description: Benchmarking Universal Single Copy Orthologs -keywords: - - quality control - - genome - - transcriptome - - proteome -tools: - - busco: - description: BUSCO provides measures for quantitative assessment of genome assembly, gene set, and transcriptome completeness based on evolutionarily informed expectations of gene content from near-universal single-copy orthologs selected from OrthoDB. - homepage: https://busco.ezlab.org/ - documentation: https://busco.ezlab.org/busco_userguide.html - tool_dev_url: https://gitlab.com/ezlab/busco - doi: "10.1007/978-1-4939-9173-0_14" - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Nucleic or amino acid sequence file in FASTA format. - pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}" - - mode: - type: string - description: The mode to run Busco in. One of genome, proteins, or transcriptome - pattern: "{genome,proteins,transcriptome}" - - lineage: - type: string - description: The BUSCO lineage to use, or "auto" to automatically select lineage - - busco_lineages_path: - type: directory - description: Path to local BUSCO lineages directory. - - config_file: - type: file - description: Path to BUSCO config file. -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - batch_summary: - type: file - description: Summary of all sequence files analyzed - pattern: "*-busco.batch_summary.txt" - - short_summaries_txt: - type: file - description: Short Busco summary in plain text format - pattern: "short_summary.*.txt" - - short_summaries_json: - type: file - description: Short Busco summary in JSON format - pattern: "short_summary.*.json" - - busco_dir: - type: directory - description: BUSCO lineage specific output - pattern: "*-busco" - - full_table: - type: file - description: Full BUSCO results table - pattern: "full_table.tsv" - - missing_busco_list: - type: file - description: List of missing BUSCOs - pattern: "missing_busco_list.tsv" - - single_copy_proteins: - type: file - description: Fasta file of single copy proteins (transcriptome mode) - pattern: "single_copy_proteins.faa" - - seq_dir: - type: directory - description: BUSCO sequence directory - pattern: "busco_sequences" - - translated_proteins: - type: directory - description: Six frame translations of each transcript made by the transcriptome mode - pattern: "translated_proteins" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@priyanka-surana" - - "@charles-plessy" - - "@mahesh-panchal" - - "@muffato" - - "@jvhagey" -maintainers: - - "@priyanka-surana" - - "@charles-plessy" - - "@mahesh-panchal" - - "@muffato" - - "@jvhagey" diff --git a/modules/nf-core/cat/cat/environment.yml b/modules/nf-core/cat/cat/environment.yml index 17a04ef2..9b01c865 100644 --- a/modules/nf-core/cat/cat/environment.yml +++ b/modules/nf-core/cat/cat/environment.yml @@ -1,7 +1,5 @@ -name: cat_cat channels: - conda-forge - bioconda - - defaults dependencies: - conda-forge::pigz=2.3.4 diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf index adbdbd7b..2862c64c 100644 --- a/modules/nf-core/cat/cat/main.nf +++ b/modules/nf-core/cat/cat/main.nf @@ -76,4 +76,3 @@ def getFileSuffix(filename) { def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/ return match ? match[0][1] : filename.substring(filename.lastIndexOf('.')) } - diff --git a/modules/nf-core/cat/cat/meta.yml b/modules/nf-core/cat/cat/meta.yml index 00a8db0b..81778a06 100644 --- a/modules/nf-core/cat/cat/meta.yml +++ b/modules/nf-core/cat/cat/meta.yml @@ -9,25 +9,32 @@ tools: description: Just concatenation documentation: https://man7.org/linux/man-pages/man1/cat.1.html licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - files_in: - type: file - description: List of compressed / uncompressed files - pattern: "*" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - files_in: + type: file + description: List of compressed / uncompressed files + pattern: "*" output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - file_out: - type: file - description: Concatenated file. Will be gzipped if file_out ends with ".gz" - pattern: "${file_out}" + - meta: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - ${prefix}: + type: file + description: Concatenated file. Will be gzipped if file_out ends with ".gz" + pattern: "${file_out}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@erikrikarddaniel" - "@FriederikeHanssen" diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test index fcee2d19..9cb16178 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test +++ b/modules/nf-core/cat/cat/tests/main.nf.test @@ -29,7 +29,8 @@ nextflow_process { then { assertAll( { assert !process.success }, - { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") } + { assert process.stdout.toString().contains("The name of the input file can't be the same as for the output prefix") }, + { assert snapshot(process.out.versions).match() } ) } } @@ -83,8 +84,12 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_zipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_zipped_zipped_size")} + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } ) } } @@ -142,8 +147,12 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_unzipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_unzipped_zipped_size")} + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } ) } } @@ -170,8 +179,12 @@ nextflow_process { def lines = path(process.out.file_out.get(0).get(1)).linesGzip assertAll( { assert process.success }, - { assert snapshot(lines[0..5]).match("test_cat_one_file_unzipped_zipped_lines") }, - { assert snapshot(lines.size()).match("test_cat_one_file_unzipped_zipped_size")} + { assert snapshot( + lines[0..5], + lines.size(), + process.out.versions + ).match() + } ) } } diff --git a/modules/nf-core/cat/cat/tests/main.nf.test.snap b/modules/nf-core/cat/cat/tests/main.nf.test.snap index 423571ba..b7623ee6 100644 --- a/modules/nf-core/cat/cat/tests/main.nf.test.snap +++ b/modules/nf-core/cat/cat/tests/main.nf.test.snap @@ -1,10 +1,4 @@ { - "test_cat_unzipped_zipped_size": { - "content": [ - 375 - ], - "timestamp": "2023-10-16T14:33:08.049445686" - }, "test_cat_unzipped_unzipped": { "content": [ { @@ -34,6 +28,10 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, "timestamp": "2023-10-16T14:32:18.500464399" }, "test_cat_zipped_unzipped": { @@ -65,9 +63,13 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, "timestamp": "2023-10-16T14:32:49.642741302" }, - "test_cat_zipped_zipped_lines": { + "test_cat_zipped_zipped": { "content": [ [ "MT192765.1\tGenbank\ttranscript\t259\t29667\t.\t+\t.\tID=unknown_transcript_1;geneID=orf1ab;gene_name=orf1ab", @@ -76,11 +78,31 @@ "MT192765.1\tGenbank\tCDS\t13461\t21548\t.\t+\t0\tParent=unknown_transcript_1;exception=\"ribosomal slippage\";gbkey=CDS;gene=orf1ab;note=\"pp1ab;translated=by -1 ribosomal frameshift\";product=\"orf1ab polyprotein\";protein_id=QIK50426.1", "MT192765.1\tGenbank\tCDS\t21556\t25377\t.\t+\t0\tParent=unknown_transcript_1;gbkey=CDS;gene=S;note=\"structural protein\";product=\"surface glycoprotein\";protein_id=QIK50427.1", "MT192765.1\tGenbank\tgene\t21556\t25377\t.\t+\t.\tParent=unknown_transcript_1" + ], + 78, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:46.802978" + }, + "test_cat_name_conflict": { + "content": [ + [ + ] ], - "timestamp": "2023-10-16T14:32:33.629048645" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:29.45394" }, - "test_cat_unzipped_zipped_lines": { + "test_cat_one_file_unzipped_zipped": { "content": [ [ ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", @@ -89,11 +111,19 @@ "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 374, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" ] ], - "timestamp": "2023-10-16T14:33:08.038830506" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:52:02.774016" }, - "test_cat_one_file_unzipped_zipped_lines": { + "test_cat_unzipped_zipped": { "content": [ [ ">MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate SARS-CoV-2/human/USA/PC00101P/2020, complete genome", @@ -102,20 +132,16 @@ "TAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTTGTCCGG", "GTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTT", "ACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAG" + ], + 375, + [ + "versions.yml:md5,115ed6177ebcff24eb99d503fa5ef894" ] ], - "timestamp": "2023-10-16T14:33:21.39642399" - }, - "test_cat_zipped_zipped_size": { - "content": [ - 78 - ], - "timestamp": "2023-10-16T14:32:33.641869244" - }, - "test_cat_one_file_unzipped_zipped_size": { - "content": [ - 374 - ], - "timestamp": "2023-10-16T14:33:21.4094373" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:51:57.581523" } } \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml deleted file mode 100644 index b48ced26..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: custom_dumpsoftwareversions -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::multiqc=1.20 diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf deleted file mode 100644 index 105f9265..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/main.nf +++ /dev/null @@ -1,24 +0,0 @@ -process CUSTOM_DUMPSOFTWAREVERSIONS { - label 'process_single' - - // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' : - 'biocontainers/multiqc:1.20--pyhdfd78af_0' }" - - input: - path versions - - output: - path "software_versions.yml" , emit: yml - path "software_versions_mqc.yml", emit: mqc_yml - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - template 'dumpsoftwareversions.py' -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml deleted file mode 100644 index 5f15a5fd..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/meta.yml +++ /dev/null @@ -1,37 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: custom_dumpsoftwareversions -description: Custom module used to dump software versions within the nf-core pipeline template -keywords: - - custom - - dump - - version -tools: - - custom: - description: Custom module used to dump software versions within the nf-core pipeline template - homepage: https://github.com/nf-core/tools - documentation: https://github.com/nf-core/tools - licence: ["MIT"] -input: - - versions: - type: file - description: YML file containing software versions - pattern: "*.yml" -output: - - yml: - type: file - description: Standard YML file containing software versions - pattern: "software_versions.yml" - - mqc_yml: - type: file - description: MultiQC custom content YML file containing software versions - pattern: "software_versions_mqc.yml" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@grst" -maintainers: - - "@drpatelh" - - "@grst" diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py deleted file mode 100755 index da033408..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env python - - -"""Provide functions to merge multiple versions.yml files.""" - - -import yaml -import platform -from textwrap import dedent - - -def _make_versions_html(versions): - """Generate a tabular HTML output of all versions for MultiQC.""" - html = [ - dedent( - """\\ - - - - - - - - - - """ - ) - ] - for process, tmp_versions in sorted(versions.items()): - html.append("") - for i, (tool, version) in enumerate(sorted(tmp_versions.items())): - html.append( - dedent( - f"""\\ - - - - - - """ - ) - ) - html.append("") - html.append("
    Process Name Software Version
    {process if (i == 0) else ''}{tool}{version}
    ") - return "\\n".join(html) - - -def main(): - """Load all version files and generate merged output.""" - versions_this_module = {} - versions_this_module["${task.process}"] = { - "python": platform.python_version(), - "yaml": yaml.__version__, - } - - with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module - - # aggregate versions by the module name (derived from fully-qualified process name) - versions_by_module = {} - for process, process_versions in versions_by_process.items(): - module = process.split(":")[-1] - try: - if versions_by_module[module] != process_versions: - raise AssertionError( - "We assume that software versions are the same between all modules. " - "If you see this error-message it means you discovered an edge-case " - "and should open an issue in nf-core/tools. " - ) - except KeyError: - versions_by_module[module] = process_versions - - versions_by_module["Workflow"] = { - "Nextflow": "$workflow.nextflow.version", - "$workflow.manifest.name": "$workflow.manifest.version", - } - - versions_mqc = { - "id": "software_versions", - "section_name": "${workflow.manifest.name} Software Versions", - "section_href": "https://github.com/${workflow.manifest.name}", - "plot_type": "html", - "description": "are collected at run time from the software output.", - "data": _make_versions_html(versions_by_module), - } - - with open("software_versions.yml", "w") as f: - yaml.dump(versions_by_module, f, default_flow_style=False) - with open("software_versions_mqc.yml", "w") as f: - yaml.dump(versions_mqc, f, default_flow_style=False) - - with open("versions.yml", "w") as f: - yaml.dump(versions_this_module, f, default_flow_style=False) - - -if __name__ == "__main__": - main() diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test deleted file mode 100644 index b1e1630b..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test +++ /dev/null @@ -1,43 +0,0 @@ -nextflow_process { - - name "Test Process CUSTOM_DUMPSOFTWAREVERSIONS" - script "../main.nf" - process "CUSTOM_DUMPSOFTWAREVERSIONS" - tag "modules" - tag "modules_nfcore" - tag "custom" - tag "dumpsoftwareversions" - tag "custom/dumpsoftwareversions" - - test("Should run without failures") { - when { - process { - """ - def tool1_version = ''' - TOOL1: - tool1: 0.11.9 - '''.stripIndent() - - def tool2_version = ''' - TOOL2: - tool2: 1.9 - '''.stripIndent() - - input[0] = Channel.of(tool1_version, tool2_version).collectFile() - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot( - process.out.versions, - file(process.out.mqc_yml[0]).readLines()[0..10], - file(process.out.yml[0]).readLines()[0..7] - ).match() - } - ) - } - } -} diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap b/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap deleted file mode 100644 index 5f59a936..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/main.nf.test.snap +++ /dev/null @@ -1,33 +0,0 @@ -{ - "Should run without failures": { - "content": [ - [ - "versions.yml:md5,76d454d92244589d32455833f7c1ba6d" - ], - [ - "data: \"\\n\\n \\n \\n \\n \\n \\n \\n \\n\\", - " \\n\\n\\n \\n \\n\\", - " \\ \\n\\n\\n\\n \\n \\", - " \\ \\n \\n\\n\\n\\n\\", - " \\n\\n \\n \\n\\", - " \\ \\n\\n\\n\\n\\n\\n \\n\\", - " \\ \\n \\n\\n\\n\\n\\", - " \\n\\n \\n \\n\\" - ], - [ - "CUSTOM_DUMPSOFTWAREVERSIONS:", - " python: 3.11.7", - " yaml: 5.4.1", - "TOOL1:", - " tool1: 0.11.9", - "TOOL2:", - " tool2: '1.9'", - "Workflow:" - ] - ], - "timestamp": "2024-01-09T23:01:18.710682" - } -} \ No newline at end of file diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml deleted file mode 100644 index 405aa24a..00000000 --- a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -custom/dumpsoftwareversions: - - modules/nf-core/custom/dumpsoftwareversions/** diff --git a/modules/nf-core/diamond/blastp/environment.yml b/modules/nf-core/diamond/blastp/environment.yml index 922ea7ed..950c3c5c 100644 --- a/modules/nf-core/diamond/blastp/environment.yml +++ b/modules/nf-core/diamond/blastp/environment.yml @@ -1,7 +1,5 @@ -name: diamond_blastp channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::diamond=2.1.8 diff --git a/modules/nf-core/diamond/blastp/meta.yml b/modules/nf-core/diamond/blastp/meta.yml index bab6801e..fbddfbd0 100644 --- a/modules/nf-core/diamond/blastp/meta.yml +++ b/modules/nf-core/diamond/blastp/meta.yml @@ -13,77 +13,116 @@ tools: tool_dev_url: https://github.com/bbuchfink/diamond doi: "10.1038/s41592-021-01101-x" licence: ["GPL v3.0"] + identifier: biotools:diamond input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input fasta file containing query sequences - pattern: "*.{fa,fasta,fa.gz,fasta.gz}" - - meta2: - type: map - description: | - Groovy Map containing db information - e.g. [ id:'test2', single_end:false ] - - db: - type: file - description: File of the indexed DIAMOND database - pattern: "*.dmnd" - - out_ext: - type: string - description: | - Specify the type of output file to be generated. `blast` corresponds to - BLAST pairwise format. `xml` corresponds to BLAST xml format. - `txt` corresponds to to BLAST tabular format. `tsv` corresponds to - taxonomic classification format. - pattern: "blast|xml|txt|daa|sam|tsv|paf" - - blast_columns: - type: string - description: | - Optional space separated list of DIAMOND tabular BLAST output keywords - used for in conjunction with the 'txt' out_ext option (--outfmt 6). Options: - qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file containing query sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing db information + e.g. [ id:'test2', single_end:false ] + - db: + type: file + description: File of the indexed DIAMOND database + pattern: "*.dmnd" + - - out_ext: + type: string + description: | + Specify the type of output file to be generated. `blast` corresponds to + BLAST pairwise format. `xml` corresponds to BLAST xml format. + `txt` corresponds to to BLAST tabular format. `tsv` corresponds to + taxonomic classification format. + pattern: "blast|xml|txt|daa|sam|tsv|paf" + - - blast_columns: + type: string + description: | + Optional space separated list of DIAMOND tabular BLAST output keywords + used for in conjunction with the 'txt' out_ext option (--outfmt 6). Options: + qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - blast: - type: file - description: File containing blastp hits - pattern: "*.{blast}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.blast": + type: file + description: File containing blastp hits + pattern: "*.{blast}" - xml: - type: file - description: File containing blastp hits - pattern: "*.{xml}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.xml": + type: file + description: File containing blastp hits + pattern: "*.{xml}" - txt: - type: file - description: File containing hits in tabular BLAST format. - pattern: "*.{txt}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: File containing hits in tabular BLAST format. + pattern: "*.{txt}" - daa: - type: file - description: File containing hits DAA format - pattern: "*.{daa}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.daa": + type: file + description: File containing hits DAA format + pattern: "*.{daa}" - sam: - type: file - description: File containing aligned reads in SAM format - pattern: "*.{sam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sam": + type: file + description: File containing aligned reads in SAM format + pattern: "*.{sam}" - tsv: - type: file - description: Tab separated file containing taxonomic classification of hits - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: Tab separated file containing taxonomic classification of hits + pattern: "*.{tsv}" - paf: - type: file - description: File containing aligned reads in pairwise mapping format format - pattern: "*.{paf}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paf": + type: file + description: File containing aligned reads in pairwise mapping format format + pattern: "*.{paf}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@spficklin" - "@jfy133" diff --git a/modules/nf-core/diamond/blastp/tests/main.nf.test b/modules/nf-core/diamond/blastp/tests/main.nf.test index 672bf050..f21e926d 100644 --- a/modules/nf-core/diamond/blastp/tests/main.nf.test +++ b/modules/nf-core/diamond/blastp/tests/main.nf.test @@ -6,6 +6,7 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "diamond" + tag "diamond/makedb" tag "diamond/blastp" setup { @@ -13,7 +14,7 @@ nextflow_process { script "../../makedb/main.nf" process { """ - input[0] = [ [id:'test2'], [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] ] + input[0] = [ [id:'test2'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] input[1] = [] input[2] = [] input[3] = [] @@ -30,7 +31,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db input[2] = 'txt' input[3] = 'qseqid qlen' @@ -41,8 +42,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.txt).match("txt") }, - { assert process.out.versions } + { assert snapshot( + process.out.txt, + process.out.versions + ).match() + } ) } @@ -56,7 +60,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['proteome_fasta_gz'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta.gz', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db input[2] = 'txt' input[3] = 'qseqid qlen' @@ -67,8 +71,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.txt).match("gz_txt") }, - { assert process.out.versions } + { assert snapshot( + process.out.txt, + process.out.versions + ).match("gz_txt") + } ) } @@ -82,7 +89,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db input[2] = 'daa' input[3] = [] @@ -94,7 +101,7 @@ nextflow_process { assertAll( { assert process.success }, { assert process.out.daa }, - { assert process.out.versions } + { assert snapshot(process.out.versions).match() } ) } diff --git a/modules/nf-core/diamond/blastp/tests/main.nf.test.snap b/modules/nf-core/diamond/blastp/tests/main.nf.test.snap index 83575bc1..e323c8b8 100644 --- a/modules/nf-core/diamond/blastp/tests/main.nf.test.snap +++ b/modules/nf-core/diamond/blastp/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "txt": { + "Should search for protein hits against a DIAMOND db and return a tab separated output file of hits": { "content": [ [ [ @@ -8,9 +8,16 @@ }, "test.txt:md5,8131b1afd717f3d5f2f2417c5b562e6e" ] + ], + [ + "versions.yml:md5,57a0ebeb0a8a732c941ae0102639a9d0" ] ], - "timestamp": "2023-11-07T10:27:02.453987512" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:40:23.906848" }, "gz_txt": { "content": [ @@ -21,8 +28,27 @@ }, "test.txt:md5,8131b1afd717f3d5f2f2417c5b562e6e" ] + ], + [ + "versions.yml:md5,57a0ebeb0a8a732c941ae0102639a9d0" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:40:29.865487" + }, + "Should search for protein hits against a DIAMOND db and return a daa format file of hits": { + "content": [ + [ + "versions.yml:md5,57a0ebeb0a8a732c941ae0102639a9d0" ] ], - "timestamp": "2023-11-07T09:41:13.934994026" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:40:35.362027" } } \ No newline at end of file diff --git a/modules/nf-core/diamond/blastx/environment.yml b/modules/nf-core/diamond/blastx/environment.yml index 70d6857d..950c3c5c 100644 --- a/modules/nf-core/diamond/blastx/environment.yml +++ b/modules/nf-core/diamond/blastx/environment.yml @@ -1,7 +1,5 @@ -name: diamond_blastx channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::diamond=2.1.8 diff --git a/modules/nf-core/diamond/blastx/meta.yml b/modules/nf-core/diamond/blastx/meta.yml index 17106548..a5c05875 100644 --- a/modules/nf-core/diamond/blastx/meta.yml +++ b/modules/nf-core/diamond/blastx/meta.yml @@ -13,81 +13,126 @@ tools: tool_dev_url: https://github.com/bbuchfink/diamond doi: "10.1038/s41592-021-01101-x" licence: ["GPL v3.0"] + identifier: biotools:diamond input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input fasta file containing query sequences - pattern: "*.{fa,fasta,fa.gz,fasta.gz}" - - meta2: - type: map - description: | - Groovy Map containing db information - e.g. [ id:'test2', single_end:false ] - - db: - type: file - description: File of the indexed DIAMOND database - pattern: "*.dmnd" - - out_ext: - type: string - description: | - Specify the type of output file to be generated. `blast` corresponds to - BLAST pairwise format. `xml` corresponds to BLAST xml format. - `txt` corresponds to to BLAST tabular format. `tsv` corresponds to - taxonomic classification format. - pattern: "blast|xml|txt|daa|sam|tsv|paf" - - blast_columns: - type: string - description: | - Optional space separated list of DIAMOND tabular BLAST output keywords - used for in conjunction with the 'txt' out_ext option (--outfmt 6). Options: - qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file containing query sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing db information + e.g. [ id:'test2', single_end:false ] + - db: + type: file + description: File of the indexed DIAMOND database + pattern: "*.dmnd" + - - out_ext: + type: string + description: | + Specify the type of output file to be generated. `blast` corresponds to + BLAST pairwise format. `xml` corresponds to BLAST xml format. + `txt` corresponds to to BLAST tabular format. `tsv` corresponds to + taxonomic classification format. + pattern: "blast|xml|txt|daa|sam|tsv|paf" + - - blast_columns: + type: string + description: | + Optional space separated list of DIAMOND tabular BLAST output keywords + used for in conjunction with the 'txt' out_ext option (--outfmt 6). Options: + qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - blast: - type: file - description: File containing blastp hits - pattern: "*.{blast}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.blast": + type: file + description: File containing blastp hits + pattern: "*.{blast}" - xml: - type: file - description: File containing blastp hits - pattern: "*.{xml}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.xml": + type: file + description: File containing blastp hits + pattern: "*.{xml}" - txt: - type: file - description: File containing hits in tabular BLAST format. - pattern: "*.{txt}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: File containing hits in tabular BLAST format. + pattern: "*.{txt}" - daa: - type: file - description: File containing hits DAA format - pattern: "*.{daa}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.daa": + type: file + description: File containing hits DAA format + pattern: "*.{daa}" - sam: - type: file - description: File containing aligned reads in SAM format - pattern: "*.{sam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sam": + type: file + description: File containing aligned reads in SAM format + pattern: "*.{sam}" - tsv: - type: file - description: Tab separated file containing taxonomic classification of hits - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tsv": + type: file + description: Tab separated file containing taxonomic classification of hits + pattern: "*.{tsv}" - paf: - type: file - description: File containing aligned reads in pairwise mapping format format - pattern: "*.{paf}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paf": + type: file + description: File containing aligned reads in pairwise mapping format format + pattern: "*.{paf}" - log: - type: file - description: Log file containing stdout information - pattern: "*.{log}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Log file containing stdout information + pattern: "*.{log}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@spficklin" - "@jfy133" diff --git a/modules/nf-core/diamond/blastx/tests/main.nf.test b/modules/nf-core/diamond/blastx/tests/main.nf.test index a367f883..b8757403 100644 --- a/modules/nf-core/diamond/blastx/tests/main.nf.test +++ b/modules/nf-core/diamond/blastx/tests/main.nf.test @@ -6,6 +6,7 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "diamond" + tag "diamond/makedb" tag "diamond/blastx" setup { @@ -13,7 +14,7 @@ nextflow_process { script "../../makedb/main.nf" process { """ - input[0] = [ [id:'test2'], [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] ] + input[0] = [ [id:'test2'], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/proteome.fasta', checkIfExists: true) ] ] input[1] = [] input[2] = [] input[3] = [] @@ -30,7 +31,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/transcriptome.fasta', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db input[2] = 'tfdfdt' // Nonsense file extension to check default case. input[3] = 'qseqid qlen' @@ -41,9 +42,12 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.txt).match("txt") }, { assert path(process.out.log.get(0).get(1)).readLines().contains("11 queries aligned.") }, - { assert process.out.versions } + { assert snapshot( + process.out.txt, + process.out.versions + ).match() + } ) } @@ -57,7 +61,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['transcriptome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/transcriptome.fasta', checkIfExists: true) ] input[1] = DIAMOND_MAKEDB.out.db input[2] = 'daa' input[3] = [] @@ -70,7 +74,7 @@ nextflow_process { { assert process.success }, { assert process.out.daa }, { assert path(process.out.log.get(0).get(1)).readLines().contains("11 queries aligned.") }, - { assert process.out.versions } + { assert snapshot(process.out.versions).match() } ) } diff --git a/modules/nf-core/diamond/blastx/tests/main.nf.test.snap b/modules/nf-core/diamond/blastx/tests/main.nf.test.snap index 27fb0a31..8a128c99 100644 --- a/modules/nf-core/diamond/blastx/tests/main.nf.test.snap +++ b/modules/nf-core/diamond/blastx/tests/main.nf.test.snap @@ -1,5 +1,17 @@ { - "txt": { + "Should search for transcriptome hits against a DIAMOND db and return the daa format output file of hits": { + "content": [ + [ + "versions.yml:md5,733d944fb173eaf1d1cdf280cd1b3b9c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:49:26.843747" + }, + "Should search for transcriptome hits against a DIAMOND db and return the default tab separated output file of hits": { "content": [ [ [ @@ -8,8 +20,15 @@ }, "test.txt:md5,33dc682dabfa44c7089abbc8fe8b84e4" ] + ], + [ + "versions.yml:md5,733d944fb173eaf1d1cdf280cd1b3b9c" ] ], - "timestamp": "2023-11-07T09:42:36.646074348" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-29T14:49:20.769093" } } \ No newline at end of file diff --git a/modules/nf-core/fastawindows/environment.yml b/modules/nf-core/fastawindows/environment.yml index ce557158..6775fb2e 100644 --- a/modules/nf-core/fastawindows/environment.yml +++ b/modules/nf-core/fastawindows/environment.yml @@ -1,7 +1,5 @@ -name: fastawindows channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::fasta_windows=0.2.4 diff --git a/modules/nf-core/fastawindows/main.nf b/modules/nf-core/fastawindows/main.nf index 40b28436..dcbf696a 100644 --- a/modules/nf-core/fastawindows/main.nf +++ b/modules/nf-core/fastawindows/main.nf @@ -31,6 +31,25 @@ process FASTAWINDOWS { --fasta $fasta \\ --output ${prefix} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fasta_windows: \$(fasta_windows --version | cut -d' ' -f3) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir -p fw_out + + touch fw_out/${prefix}_freq_windows.tsv + touch fw_out/${prefix}_mononuc_windows.tsv + touch fw_out/${prefix}_dinuc_windows.tsv + touch fw_out/${prefix}_trinuc_windows.tsv + touch fw_out/${prefix}_tetranuc_windows.tsv + + cat <<-END_VERSIONS > versions.yml "${task.process}": fasta_windows: \$(fasta_windows --version | cut -d' ' -f3) diff --git a/modules/nf-core/fastawindows/meta.yml b/modules/nf-core/fastawindows/meta.yml index 494cc1b6..f8ee0043 100644 --- a/modules/nf-core/fastawindows/meta.yml +++ b/modules/nf-core/fastawindows/meta.yml @@ -7,49 +7,78 @@ keywords: - bed tools: - "fastawindows": - description: "fasta_windows is a tool written for Darwin Tree of Life chromosomal level genome assemblies. The executable takes a fasta formatted file and calculates some statistics of interest in windows" + description: "fasta_windows is a tool written for Darwin Tree of Life chromosomal + level genome assemblies. The executable takes a fasta formatted file and calculates + some statistics of interest in windows" homepage: "https://github.com/tolkit/fasta_windows" - licence: "['MIT']" + licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: FASTA file - pattern: "*.{fa,fasta,fna}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta,fna}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - freq: - type: file - description: TSV file with frequencies and statistics - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fw_out/*_freq_windows.tsv: + type: file + description: TSV file with frequencies and statistics + pattern: "*.{tsv}" - mononuc: - type: file - description: TSV file with mononucleotide counts - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fw_out/*_mononuc_windows.tsv: + type: file + description: TSV file with mononucleotide counts + pattern: "*.{tsv}" - dinuc: - type: file - description: TSV file with dinucleotide counts - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fw_out/*_dinuc_windows.tsv: + type: file + description: TSV file with dinucleotide counts + pattern: "*.{tsv}" - trinuc: - type: file - description: TSV file with trinucleotide counts - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fw_out/*_trinuc_windows.tsv: + type: file + description: TSV file with trinucleotide counts + pattern: "*.{tsv}" - tetranuc: - type: file - description: TSV file with tetranucleotide counts - pattern: "*.{tsv}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fw_out/*_tetranuc_windows.tsv: + type: file + description: TSV file with tetranucleotide counts + pattern: "*.{tsv}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@muffato" maintainers: diff --git a/modules/nf-core/fastawindows/tests/main.nf.test b/modules/nf-core/fastawindows/tests/main.nf.test new file mode 100644 index 00000000..f4bea49f --- /dev/null +++ b/modules/nf-core/fastawindows/tests/main.nf.test @@ -0,0 +1,57 @@ + +nextflow_process { + + name "Test Process FASTAWINDOWS" + script "../main.nf" + process "FASTAWINDOWS" + + tag "modules" + tag "modules_nfcore" + tag "fastawindows" + + test("test-fastawindows") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test-fastawindows-stub") { + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/fastawindows/tests/main.nf.test.snap b/modules/nf-core/fastawindows/tests/main.nf.test.snap new file mode 100644 index 00000000..f3474d3e --- /dev/null +++ b/modules/nf-core/fastawindows/tests/main.nf.test.snap @@ -0,0 +1,196 @@ +{ + "test-fastawindows-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_freq_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_mononuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test_dinuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test_trinuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test_tetranuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,4e46686969aa8883f8092693ebcf7dab" + ], + "dinuc": [ + [ + { + "id": "test" + }, + "test_dinuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "freq": [ + [ + { + "id": "test" + }, + "test_freq_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "mononuc": [ + [ + { + "id": "test" + }, + "test_mononuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tetranuc": [ + [ + { + "id": "test" + }, + "test_tetranuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "trinuc": [ + [ + { + "id": "test" + }, + "test_trinuc_windows.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,4e46686969aa8883f8092693ebcf7dab" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-30T09:58:57.148076" + }, + "test-fastawindows": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_freq_windows.tsv:md5,237d50ac5ec2bef3142020d569fa5765" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_mononuc_windows.tsv:md5,a1b4437d0c71d9cfd676de6bda2633f0" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "test_dinuc_windows.tsv:md5,696a9f2a4b2114dfbd6b414694f56a11" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "test_trinuc_windows.tsv:md5,dfb05b758f0474e937e2d6ba6fe46dae" + ] + ], + "4": [ + [ + { + "id": "test" + }, + "test_tetranuc_windows.tsv:md5,e621537175ee8019360f8b6e8f4330b7" + ] + ], + "5": [ + "versions.yml:md5,4e46686969aa8883f8092693ebcf7dab" + ], + "dinuc": [ + [ + { + "id": "test" + }, + "test_dinuc_windows.tsv:md5,696a9f2a4b2114dfbd6b414694f56a11" + ] + ], + "freq": [ + [ + { + "id": "test" + }, + "test_freq_windows.tsv:md5,237d50ac5ec2bef3142020d569fa5765" + ] + ], + "mononuc": [ + [ + { + "id": "test" + }, + "test_mononuc_windows.tsv:md5,a1b4437d0c71d9cfd676de6bda2633f0" + ] + ], + "tetranuc": [ + [ + { + "id": "test" + }, + "test_tetranuc_windows.tsv:md5,e621537175ee8019360f8b6e8f4330b7" + ] + ], + "trinuc": [ + [ + { + "id": "test" + }, + "test_trinuc_windows.tsv:md5,dfb05b758f0474e937e2d6ba6fe46dae" + ] + ], + "versions": [ + "versions.yml:md5,4e46686969aa8883f8092693ebcf7dab" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-08-29T21:22:58.322943" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml index 25910b34..c7794856 100644 --- a/modules/nf-core/gunzip/environment.yml +++ b/modules/nf-core/gunzip/environment.yml @@ -1,7 +1,7 @@ -name: gunzip channels: - conda-forge - bioconda - - defaults dependencies: - - conda-forge::sed=4.7 + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index 468a6f28..5e67e3b9 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -4,8 +4,8 @@ process GUNZIP { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" input: tuple val(meta), path(archive) @@ -18,8 +18,11 @@ process GUNZIP { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ # Not calling gunzip itself because it creates files # with the original group ownership rather than the @@ -37,7 +40,11 @@ process GUNZIP { """ stub: - gunzip = archive.toString() - '.gz' + def args = task.ext.args ?: '' + def extension = ( archive.toString() - '.gz' ).tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".$extension" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".$extension" """ touch $gunzip cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml index 231034f2..9066c035 100644 --- a/modules/nf-core/gunzip/meta.yml +++ b/modules/nf-core/gunzip/meta.yml @@ -10,25 +10,32 @@ tools: gzip is a file format and a software application used for file compression and decompression. documentation: https://www.gnu.org/software/gzip/manual/gzip.html licence: ["GPL-3.0-or-later"] + identifier: "" input: - - meta: - type: map - description: | - Optional groovy Map containing meta information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be compressed/uncompressed - pattern: "*.*" + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" output: - gunzip: - type: file - description: Compressed/uncompressed file - pattern: "*.*" + - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - $gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" @@ -37,3 +44,4 @@ maintainers: - "@joseespinosa" - "@drpatelh" - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test index 6406008e..776211ad 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -33,4 +33,89 @@ nextflow_process { } + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + } diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap index 720fd9ff..069967e7 100644 --- a/modules/nf-core/gunzip/tests/main.nf.test.snap +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -1,4 +1,70 @@ { + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:10.861293" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:35:05.857145" + }, "Should run without failures": { "content": [ { @@ -26,6 +92,43 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, "timestamp": "2023-10-17T15:35:37.690477896" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-25T11:33:32.921739" } } \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 00000000..dec77642 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml index cf6e775f..dc6476b7 100644 --- a/modules/nf-core/minimap2/align/environment.yml +++ b/modules/nf-core/minimap2/align/environment.yml @@ -1,9 +1,8 @@ -name: minimap2_align channels: - conda-forge - bioconda - - defaults + dependencies: - - bioconda::minimap2=2.24 - - bioconda::samtools=1.18 - - bioconda::htslib=1.18 + - bioconda::htslib=1.20 + - bioconda::minimap2=2.28 + - bioconda::samtools=1.20 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index 7030554d..b0dc23bf 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -4,20 +4,22 @@ process MINIMAP2_ALIGN { // Note: the versions here need to match the versions used in the mulled container below and minimap2/index conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:365b17b986c1a60c1b82c6066a9345f38317b763-0' : - 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:365b17b986c1a60c1b82c6066a9345f38317b763-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' : + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }" input: tuple val(meta), path(reads) tuple val(meta2), path(reference) val bam_format + val bam_index_extension val cigar_paf_format val cigar_bam output: - tuple val(meta), path("*.paf"), optional: true, emit: paf - tuple val(meta), path("*.bam"), optional: true, emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.paf") , optional: true, emit: paf + tuple val(meta), path("*.bam") , optional: true, emit: bam + tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -25,16 +27,25 @@ process MINIMAP2_ALIGN { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus} -o ${prefix}.bam ${args2}" : "-o ${prefix}.paf" + def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" + def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' + def query = bam_input ? "-" : reads + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + """ + $samtools_reset_fastq \\ minimap2 \\ $args \\ -t $task.cpus \\ - ${reference ?: reads} \\ - $reads \\ + $target \\ + $query \\ $cigar_paf \\ $set_cigar_bam \\ $bam_output @@ -43,14 +54,20 @@ process MINIMAP2_ALIGN { cat <<-END_VERSIONS > versions.yml "${task.process}": minimap2: \$(minimap2 --version 2>&1) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + """ touch $output_file + ${bam_index} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml index 408522d5..a4cfc891 100644 --- a/modules/nf-core/minimap2/align/meta.yml +++ b/modules/nf-core/minimap2/align/meta.yml @@ -14,62 +14,86 @@ tools: homepage: https://github.com/lh3/minimap2 documentation: https://github.com/lh3/minimap2#uguide licence: ["MIT"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FASTA or FASTQ files of size 1 and 2 for single-end - and paired-end data, respectively. - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test_ref'] - - reference: - type: file - description: | - Reference database in FASTA format. - - bam_format: - type: boolean - description: Specify that output should be in BAM format - - cigar_paf_format: - type: boolean - description: Specify that output CIGAR should be in PAF format - - cigar_bam: - type: boolean - description: | - Write CIGAR with >65535 ops at the CG tag. This is recommended when - doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FASTA or FASTQ files of size 1 and 2 for single-end + and paired-end data, respectively. + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] + - reference: + type: file + description: | + Reference database in FASTA format. + - - bam_format: + type: boolean + description: Specify that output should be in BAM format + - - bam_index_extension: + type: string + description: BAM alignment index extension (e.g. "bai") + - - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - paf: - type: file - description: Alignment in PAF format - pattern: "*.paf" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paf": + type: file + description: Alignment in PAF format + pattern: "*.paf" - bam: - type: file - description: Alignment in BAM format - pattern: "*.bam" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Alignment in BAM format + pattern: "*.bam" + - index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam.${bam_index_extension}": + type: file + description: BAM alignment index + pattern: "*.bam.*" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@heuermh" - "@sofstam" - "@sateeshperi" - "@jfy133" + - "@fellen31" maintainers: - "@heuermh" - "@sofstam" - "@sateeshperi" - "@jfy133" + - "@fellen31" diff --git a/modules/nf-core/minimap2/align/minimap2-align.diff b/modules/nf-core/minimap2/align/minimap2-align.diff index 479818b3..9dd291e7 100644 --- a/modules/nf-core/minimap2/align/minimap2-align.diff +++ b/modules/nf-core/minimap2/align/minimap2-align.diff @@ -4,7 +4,7 @@ Changes in module 'nf-core/minimap2/align' @@ -1,6 +1,5 @@ process MINIMAP2_ALIGN { tag "$meta.id" -- label 'process_medium' +- label 'process_high' // Note: the versions here need to match the versions used in the mulled container below and minimap2/index conda "${moduleDir}/environment.yml" diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test index 4d77e0d9..4072c171 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -9,22 +9,23 @@ nextflow_process { tag "minimap2" tag "minimap2/align" - test("sarscov2 - fastq, fasta, true, false, false") { + test("sarscov2 - fastq, fasta, true, [], false, false") { when { process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] input[1] = [ [ id:'test_ref' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[2] = true - input[3] = false + input[3] = [] input[4] = false + input[5] = false """ } } @@ -33,7 +34,43 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, process.out.versions ).match() } ) @@ -49,17 +86,18 @@ nextflow_process { input[0] = [ [ id:'test', single_end:false ], // meta map [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), - file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] ] input[1] = [ [ id:'test_ref' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[2] = true - input[3] = false + input[3] = [] input[4] = false + input[5] = false """ } } @@ -68,7 +106,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), process.out.versions ).match() } ) @@ -83,15 +122,16 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), ] input[1] = [ [ id:'test_ref' ], // meta map [] ] input[2] = true - input[3] = false + input[3] = [] input[4] = false + input[5] = false """ } } @@ -100,7 +140,8 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), process.out.versions ).match() } ) @@ -108,24 +149,57 @@ nextflow_process { } - test("sarscov2 - fastq, fasta, true, false, false - stub") { + test("sarscov2 - bam, fasta, true, [], false, false") { - options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false") { when { process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) ] input[1] = [ [ id:'test_ref' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[2] = true - input[3] = false + input[3] = 'bai' input[4] = false + input[5] = false """ } } @@ -134,7 +208,9 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - file(process.out.bam[0][1]).name, + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, process.out.versions ).match() } ) @@ -142,7 +218,36 @@ nextflow_process { } - test("sarscov2 - fastq, fasta, false, false, false - stub") { + test("sarscov2 - bam, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, [], false, false - stub") { options "-stub" @@ -151,15 +256,80 @@ nextflow_process { """ input[0] = [ [ id:'test', single_end:true ], // meta map - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] input[1] = [ [ id:'test_ref' ], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, false, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[2] = false - input[3] = false + input[3] = [] input[4] = false + input[5] = false """ } } @@ -167,13 +337,105 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot( - file(process.out.paf[0][1]).name, - process.out.versions - ).match() } + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } ) } } -} +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap index ec99d13e..12264a85 100644 --- a/modules/nf-core/minimap2/align/tests/main.nf.test.snap +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -1,67 +1,476 @@ { - "sarscov2 - fastq, fasta, true, false, false": { + "sarscov2 - bam, fasta, true, 'bai', false, false": { "content": [ - "test.bam", [ - "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:03:00.827260362" + }, + "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:37.92353539" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:29:44.669021368" + }, + "sarscov2 - fastq, fasta, false, [], false, false - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + + ], + "index": [ + + ], + "paf": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:52.738781039" + }, + "sarscov2 - fastq, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:23.033808223" + }, + "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "1bc392244f228bf52cf0b5a8f6a654c9", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nextflow": "24.04.2" }, - "timestamp": "2023-12-04T12:07:06.01315354" + "timestamp": "2024-07-23T11:18:18.964586894" }, - "sarscov2 - fastq, fasta, true, false, false - stub": { + "sarscov2 - fastq, fasta, true, [], false, false": { "content": [ - "test.bam", [ - "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "f194745c0ccfcb2a9c0aee094a08750", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nextflow": "24.04.2" }, - "timestamp": "2023-12-04T12:07:24.487175659" + "timestamp": "2024-07-23T11:17:48.667488325" }, - "sarscov2 - fastq, fasta, false, false, false - stub": { + "sarscov2 - fastq, fasta, true, 'bai', false, false": { "content": [ - "test.paf", [ - "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "f194745c0ccfcb2a9c0aee094a08750", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nextflow": "24.04.2" }, - "timestamp": "2024-03-01T11:06:54.090105" + "timestamp": "2024-07-23T11:18:02.517416733" }, - "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "sarscov2 - bam, fasta, true, [], false, false": { "content": [ - "test.bam", [ - "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nextflow": "24.04.2" }, - "timestamp": "2023-12-04T12:07:12.50816279" + "timestamp": "2024-07-25T09:02:49.64829488" + }, + "sarscov2 - bam, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:22.162291795" }, "sarscov2 - fastq, [], true, false, false": { "content": [ - "test.bam", [ - "versions.yml:md5,9e9eeae0002d466d580a9d6e0d003eb1" + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:ERR5069949.2151832\tLN:150", + "@SQ\tSN:ERR5069949.576388\tLN:77", + "@SQ\tSN:ERR5069949.501486\tLN:146", + "@SQ\tSN:ERR5069949.1331889\tLN:132", + "@SQ\tSN:ERR5069949.2161340\tLN:80", + "@SQ\tSN:ERR5069949.973930\tLN:79", + "@SQ\tSN:ERR5069949.2417063\tLN:150", + "@SQ\tSN:ERR5069949.376959\tLN:151", + "@SQ\tSN:ERR5069949.1088785\tLN:149", + "@SQ\tSN:ERR5069949.1066259\tLN:147", + "@SQ\tSN:ERR5069949.2832676\tLN:139", + "@SQ\tSN:ERR5069949.2953930\tLN:151", + "@SQ\tSN:ERR5069949.324865\tLN:151", + "@SQ\tSN:ERR5069949.2185111\tLN:150", + "@SQ\tSN:ERR5069949.937422\tLN:151", + "@SQ\tSN:ERR5069949.2431709\tLN:150", + "@SQ\tSN:ERR5069949.1246538\tLN:148", + "@SQ\tSN:ERR5069949.1189252\tLN:98", + "@SQ\tSN:ERR5069949.2216307\tLN:147", + "@SQ\tSN:ERR5069949.3273002\tLN:148", + "@SQ\tSN:ERR5069949.3277445\tLN:151", + "@SQ\tSN:ERR5069949.3022231\tLN:147", + "@SQ\tSN:ERR5069949.184542\tLN:151", + "@SQ\tSN:ERR5069949.540529\tLN:149", + "@SQ\tSN:ERR5069949.686090\tLN:150", + "@SQ\tSN:ERR5069949.2787556\tLN:106", + "@SQ\tSN:ERR5069949.2650879\tLN:150", + "@SQ\tSN:ERR5069949.2064910\tLN:149", + "@SQ\tSN:ERR5069949.2328704\tLN:150", + "@SQ\tSN:ERR5069949.1067032\tLN:150", + "@SQ\tSN:ERR5069949.3338256\tLN:151", + "@SQ\tSN:ERR5069949.1412839\tLN:147", + "@SQ\tSN:ERR5069949.1538968\tLN:150", + "@SQ\tSN:ERR5069949.147998\tLN:94", + "@SQ\tSN:ERR5069949.366975\tLN:106", + "@SQ\tSN:ERR5069949.1372331\tLN:151", + "@SQ\tSN:ERR5069949.1709367\tLN:129", + "@SQ\tSN:ERR5069949.2388984\tLN:150", + "@SQ\tSN:ERR5069949.1132353\tLN:150", + "@SQ\tSN:ERR5069949.1151736\tLN:151", + "@SQ\tSN:ERR5069949.479807\tLN:150", + "@SQ\tSN:ERR5069949.2176303\tLN:151", + "@SQ\tSN:ERR5069949.2772897\tLN:151", + "@SQ\tSN:ERR5069949.1020777\tLN:122", + "@SQ\tSN:ERR5069949.465452\tLN:151", + "@SQ\tSN:ERR5069949.1704586\tLN:149", + "@SQ\tSN:ERR5069949.1258508\tLN:151", + "@SQ\tSN:ERR5069949.986441\tLN:119", + "@SQ\tSN:ERR5069949.2674295\tLN:148", + "@SQ\tSN:ERR5069949.885966\tLN:79", + "@SQ\tSN:ERR5069949.2342766\tLN:151", + "@SQ\tSN:ERR5069949.3122970\tLN:127", + "@SQ\tSN:ERR5069949.3279513\tLN:72", + "@SQ\tSN:ERR5069949.309410\tLN:151", + "@SQ\tSN:ERR5069949.532979\tLN:149", + "@SQ\tSN:ERR5069949.2888794\tLN:151", + "@SQ\tSN:ERR5069949.2205229\tLN:150", + "@SQ\tSN:ERR5069949.786562\tLN:151", + "@SQ\tSN:ERR5069949.919671\tLN:151", + "@SQ\tSN:ERR5069949.1328186\tLN:151", + "@SQ\tSN:ERR5069949.870926\tLN:149", + "@SQ\tSN:ERR5069949.2257580\tLN:151", + "@SQ\tSN:ERR5069949.3249622\tLN:77", + "@SQ\tSN:ERR5069949.611123\tLN:125", + "@SQ\tSN:ERR5069949.651338\tLN:142", + "@SQ\tSN:ERR5069949.169513\tLN:92", + "@SQ\tSN:ERR5069949.155944\tLN:150", + "@SQ\tSN:ERR5069949.2033605\tLN:150", + "@SQ\tSN:ERR5069949.2730382\tLN:142", + "@SQ\tSN:ERR5069949.2125592\tLN:150", + "@SQ\tSN:ERR5069949.1062611\tLN:151", + "@SQ\tSN:ERR5069949.1778133\tLN:151", + "@SQ\tSN:ERR5069949.3057020\tLN:95", + "@SQ\tSN:ERR5069949.2972968\tLN:141", + "@SQ\tSN:ERR5069949.2734474\tLN:149", + "@SQ\tSN:ERR5069949.856527\tLN:151", + "@SQ\tSN:ERR5069949.2098070\tLN:151", + "@SQ\tSN:ERR5069949.1552198\tLN:150", + "@SQ\tSN:ERR5069949.2385514\tLN:150", + "@SQ\tSN:ERR5069949.2270078\tLN:151", + "@SQ\tSN:ERR5069949.114870\tLN:150", + "@SQ\tSN:ERR5069949.2668880\tLN:147", + "@SQ\tSN:ERR5069949.257821\tLN:139", + "@SQ\tSN:ERR5069949.2243023\tLN:150", + "@SQ\tSN:ERR5069949.2605155\tLN:146", + "@SQ\tSN:ERR5069949.1340552\tLN:151", + "@SQ\tSN:ERR5069949.1561137\tLN:150", + "@SQ\tSN:ERR5069949.2361683\tLN:149", + "@SQ\tSN:ERR5069949.2521353\tLN:150", + "@SQ\tSN:ERR5069949.1261808\tLN:149", + "@SQ\tSN:ERR5069949.2734873\tLN:98", + "@SQ\tSN:ERR5069949.3017828\tLN:107", + "@SQ\tSN:ERR5069949.573706\tLN:150", + "@SQ\tSN:ERR5069949.1980512\tLN:151", + "@SQ\tSN:ERR5069949.1014693\tLN:150", + "@SQ\tSN:ERR5069949.3184655\tLN:150", + "@SQ\tSN:ERR5069949.29668\tLN:89", + "@SQ\tSN:ERR5069949.3258358\tLN:151", + "@SQ\tSN:ERR5069949.1476386\tLN:151", + "@SQ\tSN:ERR5069949.2415814\tLN:150", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "16c1c651f8ec67383bcdee3c55aed94f", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" ] ], "meta": { "nf-test": "0.8.4", - "nextflow": "23.10.0" + "nextflow": "24.04.2" }, - "timestamp": "2023-12-04T12:07:18.414974788" + "timestamp": "2024-07-23T11:18:34.246998277" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index ca39fb67..a27122ce 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,7 +1,5 @@ -name: multiqc channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::multiqc=1.21 + - bioconda::multiqc=1.27 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 47ac352f..58d9313c 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,14 +3,16 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.21--pyhdfd78af_0' : - 'biocontainers/multiqc:1.21--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.27--pyhdfd78af_0' : + 'biocontainers/multiqc:1.27--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" path(multiqc_config) path(extra_multiqc_config) path(multiqc_logo) + path(replace_names) + path(sample_names) output: path "*multiqc_report.html", emit: report @@ -23,16 +25,22 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' - def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ $args \\ $config \\ + $prefix \\ $extra_config \\ $logo \\ + $replace \\ + $samples \\ . cat <<-END_VERSIONS > versions.yml @@ -44,7 +52,7 @@ process MULTIQC { stub: """ mkdir multiqc_data - touch multiqc_plots + mkdir multiqc_plots touch multiqc_report.html cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index 45a9bc35..b16c1879 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,6 @@ name: multiqc -description: Aggregate results from bioinformatics analyses across many samples into a single report +description: Aggregate results from bioinformatics analyses across many samples into + a single report keywords: - QC - bioinformatics tools @@ -12,40 +13,59 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + identifier: biotools:multiqc input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" - data: - type: directory - description: MultiQC data dir - pattern: "multiqc_data" + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@abhi18av" - "@bunop" diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index f1c4242e..33316a7d 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -8,6 +8,8 @@ nextflow_process { tag "modules_nfcore" tag "multiqc" + config "./nextflow.config" + test("sarscov2 single-end [fastqc]") { when { @@ -17,6 +19,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -41,6 +45,8 @@ nextflow_process { input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -66,6 +72,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index bfebd802..7b7c1322 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-02-29T08:48:55.657331" + "timestamp": "2025-01-27T09:29:57.631982377" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-02-29T08:49:49.071937" + "timestamp": "2025-01-27T09:30:34.743726958" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,21f35ee29416b9b3073c28733efe4b7d" + "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" + "nf-test": "0.9.2", + "nextflow": "24.10.4" }, - "timestamp": "2024-02-29T08:49:25.457567" + "timestamp": "2025-01-27T09:30:21.44383553" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 00000000..c537a6a3 --- /dev/null +++ b/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/modules/nf-core/pigz/compress/environment.yml b/modules/nf-core/pigz/compress/environment.yml index 7551d187..5016d226 100644 --- a/modules/nf-core/pigz/compress/environment.yml +++ b/modules/nf-core/pigz/compress/environment.yml @@ -1,9 +1,7 @@ --- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json -name: "pigz_compress" channels: - conda-forge - bioconda - - defaults dependencies: - "pigz=2.8" diff --git a/modules/nf-core/pigz/compress/meta.yml b/modules/nf-core/pigz/compress/meta.yml index 42efd735..0966e651 100644 --- a/modules/nf-core/pigz/compress/meta.yml +++ b/modules/nf-core/pigz/compress/meta.yml @@ -1,4 +1,3 @@ ---- # yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "pigz_compress" description: Compresses files with pigz. @@ -12,35 +11,33 @@ tools: homepage: "https://zlib.net/pigz/" documentation: "https://zlib.net/pigz/pigz.pdf" + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - - raw_file: - type: file - description: File to be compressed - pattern: "*.*" - + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - raw_file: + type: file + description: File to be compressed + pattern: "*.*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - archive: - type: file - description: The compressed file - pattern: "*.gz" - + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - $archive: + type: file + description: The compressed file + pattern: "*.gz" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@leoisl" maintainers: diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test b/modules/nf-core/pigz/compress/tests/main.nf.test index 248d40fb..b3cb25e3 100644 --- a/modules/nf-core/pigz/compress/tests/main.nf.test +++ b/modules/nf-core/pigz/compress/tests/main.nf.test @@ -14,7 +14,7 @@ nextflow_process { """ input[0] = [ [ id:'test'], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -34,7 +34,7 @@ nextflow_process { """ input[0] = [ [ id:'test'], // meta map - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } @@ -42,7 +42,11 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(file(process.out.archive[0][1]).name).match() } + { assert snapshot( + file(process.out.archive[0][1]).name, + process.out.versions + ).match() + } ) } } diff --git a/modules/nf-core/pigz/compress/tests/main.nf.test.snap b/modules/nf-core/pigz/compress/tests/main.nf.test.snap index 6e50456f..4d8df9f1 100644 --- a/modules/nf-core/pigz/compress/tests/main.nf.test.snap +++ b/modules/nf-core/pigz/compress/tests/main.nf.test.snap @@ -26,12 +26,23 @@ ] } ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, "timestamp": "2023-12-11T22:39:53.350546" }, "sarscov2 - genome - fasta - stub": { "content": [ - "genome.fasta.gz" + "genome.fasta.gz", + [ + "versions.yml:md5,ca30e9e1ffa1394ba7eefdac8cf3a3ad" + ] ], - "timestamp": "2023-12-11T22:52:24.309192" + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-30T12:18:32.339508" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/fasta/environment.yml b/modules/nf-core/samtools/fasta/environment.yml deleted file mode 100644 index 14585013..00000000 --- a/modules/nf-core/samtools/fasta/environment.yml +++ /dev/null @@ -1,8 +0,0 @@ -name: samtools_fasta -channels: - - conda-forge - - bioconda - - defaults -dependencies: - - bioconda::samtools=1.19.2 - - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/fasta/main.nf b/modules/nf-core/samtools/fasta/main.nf deleted file mode 100644 index 9aa03430..00000000 --- a/modules/nf-core/samtools/fasta/main.nf +++ /dev/null @@ -1,43 +0,0 @@ -process SAMTOOLS_FASTA { - tag "$meta.id" - label 'process_low' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : - 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" - - input: - tuple val(meta), path(input) - val(interleave) - - output: - tuple val(meta), path("*_{1,2}.fasta.gz") , optional:true, emit: fasta - tuple val(meta), path("*_interleaved.fasta.gz"), optional:true, emit: interleaved - tuple val(meta), path("*_singleton.fasta.gz") , optional:true, emit: singleton - tuple val(meta), path("*_other.fasta.gz") , optional:true, emit: other - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def output = ( interleave && ! meta.single_end ) ? "| gzip > ${prefix}_interleaved.fasta.gz" : - meta.single_end ? "-1 ${prefix}_1.fasta.gz -s ${prefix}_singleton.fasta.gz" : - "-1 ${prefix}_1.fasta.gz -2 ${prefix}_2.fasta.gz -s ${prefix}_singleton.fasta.gz" - """ - samtools \\ - fasta \\ - $args \\ - --threads ${task.cpus-1} \\ - $input \\ - $output - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/samtools/fasta/meta.yml b/modules/nf-core/samtools/fasta/meta.yml deleted file mode 100644 index eae26f01..00000000 --- a/modules/nf-core/samtools/fasta/meta.yml +++ /dev/null @@ -1,60 +0,0 @@ -name: "samtools_fasta" -description: Converts a SAM/BAM/CRAM file to FASTA -keywords: - - bam - - sam - - cram - - fasta -tools: - - "samtools": - description: "Tools for dealing with SAM, BAM and CRAM files" - homepage: "http://www.htslib.org" - documentation: "https://www.htslib.org/doc/samtools-fasta.html" - tool_dev_url: "https://github.com/samtools/samtools" - doi: "10.1093/bioinformatics/btp352" - licence: ["MIT"] -input: - # Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - interleave: - type: boolean - description: Set true for interleaved fasta files -output: - #Only when we have meta - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - - fasta: - type: file - description: Compressed FASTA file(s) with reads with either the READ1 or READ2 flag set in separate files. - pattern: "*_{1,2}.fasta.gz" - - interleaved: - type: file - description: Compressed FASTA file with reads with either the READ1 or READ2 flag set in a combined file. Needs collated input file. - pattern: "*_interleaved.fasta.gz" - - singleton: - type: file - description: Compressed FASTA file with singleton reads - pattern: "*_singleton.fasta.gz" - - other: - type: file - description: Compressed FASTA file with reads with either both READ1 and READ2 flags set or unset - pattern: "*_other.fasta.gz" -authors: - - "@priyanka-surana" -maintainers: - - "@priyanka-surana" diff --git a/modules/nf-core/samtools/fasta/samtools-fasta.diff b/modules/nf-core/samtools/fasta/samtools-fasta.diff deleted file mode 100644 index e2374ed9..00000000 --- a/modules/nf-core/samtools/fasta/samtools-fasta.diff +++ /dev/null @@ -1,13 +0,0 @@ -Changes in module 'nf-core/samtools/fasta' ---- modules/nf-core/samtools/fasta/main.nf -+++ modules/nf-core/samtools/fasta/main.nf -@@ -32,7 +32,6 @@ - fasta \\ - $args \\ - --threads ${task.cpus-1} \\ -- -0 ${prefix}_other.fasta.gz \\ - $input \\ - $output - - -************************************************************ diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml index bd57cb54..62054fc9 100644 --- a/modules/nf-core/samtools/flagstat/environment.yml +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -1,8 +1,8 @@ -name: samtools_flagstat +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.19.2 - - bioconda::htslib=1.19.1 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf index eb5f5252..c23f3a5c 100644 --- a/modules/nf-core/samtools/flagstat/main.nf +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_FLAGSTAT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : - 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(bam), path(bai) @@ -18,7 +18,6 @@ process SAMTOOLS_FLAGSTAT { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ samtools \\ diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml index 97991358..cdc4c254 100644 --- a/modules/nf-core/samtools/flagstat/meta.yml +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -1,5 +1,6 @@ name: samtools_flagstat -description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG + type keywords: - stats - mapping @@ -17,34 +18,37 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - bai: - type: file - description: Index for BAM/CRAM/SAM file - pattern: "*.{bai,crai,sai}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - flagstat: - type: file - description: File containing samtools flagstat output - pattern: "*.{flagstat}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.flagstat": + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" maintainers: diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test index 24c3c04b..3b648a37 100644 --- a/modules/nf-core/samtools/flagstat/tests/main.nf.test +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -11,9 +11,30 @@ nextflow_process { test("BAM") { when { - params { - outdir = "$outputDir" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("BAM - stub") { + + options "-stub" + + when { process { """ input[0] = Channel.of([ @@ -28,8 +49,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.flagstat).match("flagstat") }, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap index a76fc27e..04c3852b 100644 --- a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -1,32 +1,72 @@ { - "flagstat": { + "BAM - stub": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" ] - ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-12T18:31:37.783927" + "timestamp": "2024-09-16T08:02:58.866491759" }, - "versions": { + "BAM": { "content": [ - [ - "versions.yml:md5,fd0030ce49ab3a92091ad80260226452" - ] + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "1": [ + "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "versions": [ + "versions.yml:md5,108a155f2d4a99f50bf3176904208d27" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:11:44.299617452" + "timestamp": "2024-09-16T08:02:47.383332837" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml index a5e50649..62054fc9 100644 --- a/modules/nf-core/samtools/index/environment.yml +++ b/modules/nf-core/samtools/index/environment.yml @@ -1,8 +1,8 @@ -name: samtools_index +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.19.2 - - bioconda::htslib=1.19.1 + - bioconda::htslib=1.21 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf index dc14f98d..31175610 100644 --- a/modules/nf-core/samtools/index/main.nf +++ b/modules/nf-core/samtools/index/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_INDEX { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : - 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input) @@ -35,10 +35,11 @@ process SAMTOOLS_INDEX { """ stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" """ - touch ${input}.bai - touch ${input}.crai - touch ${input}.csi + touch ${input}.${extension} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml index 01a4ee03..db8df0d5 100644 --- a/modules/nf-core/samtools/index/meta.yml +++ b/modules/nf-core/samtools/index/meta.yml @@ -15,38 +15,52 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - bam: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" - - crai: - type: file - description: BAM/CRAM/SAM index file - pattern: "*.{bai,crai,sai}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" - csi: - type: file - description: CSI index file - pattern: "*.{csi}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + - crai: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@ewels" diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test index bb7756d1..ca34fb5c 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -9,11 +9,7 @@ nextflow_process { tag "samtools/index" test("bai") { - when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -27,18 +23,13 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.bai).match("bai") }, - { assert snapshot(process.out.versions).match("bai_versions") } + { assert snapshot(process.out).match() } ) } } test("crai") { - when { - params { - outdir = "$outputDir" - } process { """ input[0] = Channel.of([ @@ -52,20 +43,83 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert snapshot(process.out.crai).match("crai") }, - { assert snapshot(process.out.versions).match("crai_versions") } + { assert snapshot(process.out).match() } ) } } test("csi") { - config "./csi.nextflow.config" when { - params { - outdir = "$outputDir" + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { process { """ input[0] = Channel.of([ @@ -79,8 +133,7 @@ nextflow_process { then { assertAll ( { assert process.success }, - { assert path(process.out.csi.get(0).get(1)).exists() }, - { assert snapshot(process.out.versions).match("csi_versions") } + { assert snapshot(process.out).match() } ) } } diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap index 3dc8e7de..72d65e81 100644 --- a/modules/nf-core/samtools/index/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -1,74 +1,250 @@ { - "crai_versions": { + "csi - stub": { "content": [ - [ - "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" - ] + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:12:00.324667957" + "timestamp": "2024-09-16T08:21:25.261127166" }, - "csi_versions": { + "crai - stub": { "content": [ - [ - "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" - ] + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:12:07.885103162" + "timestamp": "2024-09-16T08:21:12.653194876" }, - "crai": { + "bai - stub": { "content": [ - [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] - ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-12T18:41:38.446424" + "timestamp": "2024-09-16T08:21:01.854932651" }, - "bai": { + "csi": { "content": [ + "test.paired_end.sorted.bam.csi", [ - [ - { - "id": "test", - "single_end": false - }, - "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" - ] + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "23.04.3" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-12T18:40:46.579747" + "timestamp": "2024-09-16T08:20:51.485364222" }, - "bai_versions": { + "crai": { "content": [ - [ - "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" - ] + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T08:20:40.518873972" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,5e09a6fdf76de396728f877193d72315" + ] + } ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:11:51.641425452" + "timestamp": "2024-09-16T08:20:21.184050361" } } \ No newline at end of file diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml index b0676f33..02cda6e6 100644 --- a/modules/nf-core/samtools/view/environment.yml +++ b/modules/nf-core/samtools/view/environment.yml @@ -1,8 +1,10 @@ -name: samtools_view +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::samtools=1.19.2 - - bioconda::htslib=1.19.1 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.21 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf index 5a8989d6..4f69a94b 100644 --- a/modules/nf-core/samtools/view/main.nf +++ b/modules/nf-core/samtools/view/main.nf @@ -4,8 +4,8 @@ process SAMTOOLS_VIEW { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : - 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : + 'biocontainers/samtools:1.21--h50ea8bc_0' }" input: tuple val(meta), path(input), path(index) @@ -13,13 +13,15 @@ process SAMTOOLS_VIEW { path qname output: - tuple val(meta), path("*.bam"), emit: bam, optional: true - tuple val(meta), path("*.cram"), emit: cram, optional: true - tuple val(meta), path("*.sam"), emit: sam, optional: true - tuple val(meta), path("*.bai"), emit: bai, optional: true - tuple val(meta), path("*.csi"), emit: csi, optional: true - tuple val(meta), path("*.crai"), emit: crai, optional: true - path "versions.yml", emit: versions + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${file_type}.bai"), emit: bai, optional: true + tuple val(meta), path("${prefix}.${file_type}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${file_type}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}"), emit: unselected, optional: true + tuple val(meta), path("${prefix}.unselected.${file_type}.{bai,csi,crsi}"), emit: unselected_index, optional: true + path "versions.yml", emit: versions when: task.ext.when == null || task.ext.when @@ -27,13 +29,13 @@ process SAMTOOLS_VIEW { script: def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" - def readnames = qname ? "--qname-file ${qname}": "" - def file_type = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt bam") ? "bam" : - args.contains("--output-fmt cram") ? "cram" : - input.getExtension() + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + readnames = qname ? "--qname-file ${qname} --output-unselected ${prefix}.unselected.${file_type}": "" if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" """ samtools \\ @@ -54,14 +56,14 @@ process SAMTOOLS_VIEW { stub: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def file_type = args.contains("--output-fmt sam") ? "sam" : - args.contains("--output-fmt bam") ? "bam" : - args.contains("--output-fmt cram") ? "cram" : - input.getExtension() + prefix = task.ext.prefix ?: "${meta.id}" + file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - def index = args.contains("--write-index") ? "touch ${prefix}.csi" : "" + index = args.contains("--write-index") ? "touch ${prefix}.${file_type}.csi" : "" """ touch ${prefix}.${file_type} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml index 3dadafae..caa7b015 100644 --- a/modules/nf-core/samtools/view/meta.yml +++ b/modules/nf-core/samtools/view/meta.yml @@ -15,68 +15,120 @@ tools: documentation: http://www.htslib.org/doc/samtools.html doi: 10.1093/bioinformatics/btp352 licence: ["MIT"] + identifier: biotools:samtools input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - index: - type: file - description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) - pattern: "*.{.bai,.csi,.crai}" - - meta2: - type: map - description: | - Groovy Map containing reference information - e.g. [ id:'test' ] - - fasta: - type: file - description: Reference file the CRAM was created with (optional) - pattern: "*.{fasta,fa}" - - qname: - type: file - description: Optional file with read names to output only select alignments - pattern: "*.{txt,list}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - bam: - type: file - description: optional filtered/converted BAM file - pattern: "*.{bam}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" - cram: - type: file - description: optional filtered/converted CRAM file - pattern: "*.{cram}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" - sam: - type: file - description: optional filtered/converted SAM file - pattern: "*.{sam}" - # bai, csi, and crai are created with `--write-index` + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" - bai: - type: file - description: optional BAM file index - pattern: "*.{bai}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" - csi: - type: file - description: optional tabix BAM file index - pattern: "*.{csi}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" - crai: - type: file - description: optional CRAM file index - pattern: "*.{crai}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.${file_type}.crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - unselected: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}: + type: file + description: optional file with unselected alignments + pattern: "*.unselected.{bam,cram,sam}" + - unselected_index: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.unselected.${file_type}.{bai,csi,crsi}: + type: file + description: index for the "unselected" file + pattern: "*.unselected.{bai,csi,crai}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@joseespinosa" diff --git a/modules/nf-core/samtools/view/samtools-view.diff b/modules/nf-core/samtools/view/samtools-view.diff new file mode 100644 index 00000000..fb47ba9a --- /dev/null +++ b/modules/nf-core/samtools/view/samtools-view.diff @@ -0,0 +1,16 @@ +Changes in module 'nf-core/samtools/view' +--- modules/nf-core/samtools/view/main.nf ++++ modules/nf-core/samtools/view/main.nf +@@ -4,8 +4,8 @@ + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? +- 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/9e/9edc2564215d5cd137a8b25ca8a311600987186d406b092022444adf3c4447f7/data' : +- 'community.wave.seqera.io/library/htslib_samtools:1.21--6cb89bfd40cbaabf' }" ++ 'https://depot.galaxyproject.org/singularity/samtools:1.21--h50ea8bc_0' : ++ 'biocontainers/samtools:1.21--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(index) + +************************************************************ diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test index 45a0defb..37b81a91 100644 --- a/modules/nf-core/samtools/view/tests/main.nf.test +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -172,6 +172,8 @@ nextflow_process { { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(file(process.out.unselected[0][1]).name).match("cram_to_bam_index_qname_unselected") }, + { assert snapshot(file(process.out.unselected_index[0][1]).name).match("cram_to_bam_index_qname_unselected_csi") }, { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } ) } diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap index f55943a7..63849b03 100644 --- a/modules/nf-core/samtools/view/tests/main.nf.test.snap +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -56,14 +56,14 @@ "bam_stub_versions": { "content": [ [ - "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:13:09.713353823" + "timestamp": "2024-09-16T09:26:24.461775464" }, "cram_to_bam_index_cram": { "content": [ @@ -169,6 +169,16 @@ }, "timestamp": "2024-02-12T19:37:56.490286" }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, "bam_csi": { "content": [ [ @@ -208,14 +218,14 @@ "cram_to_bam_index_qname_versions": { "content": [ [ - "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:13:03.935041046" + "timestamp": "2024-09-16T09:25:51.953436682" }, "cram_to_bam_bam": { "content": [ @@ -240,14 +250,14 @@ "cram_to_bam_index_versions": { "content": [ [ - "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:12:55.910685496" + "timestamp": "2024-09-16T09:25:14.475388399" }, "cram_to_bam_bai": { "content": [ @@ -264,14 +274,14 @@ "cram_to_bam_versions": { "content": [ [ - "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:12:47.715221169" + "timestamp": "2024-09-16T09:24:49.673441798" }, "cram_bam": { "content": [ @@ -355,17 +365,37 @@ }, "timestamp": "2024-02-12T19:38:23.322874" }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "cram_to_bam_index_qname_unselected_csi": { + "content": [ + "test.unselected.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, "bam_versions": { "content": [ [ - "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-02-13T16:12:31.692607421" + "timestamp": "2024-09-16T09:23:27.151650338" }, "cram_to_bam_index_qname_cram": { "content": [ @@ -430,14 +460,24 @@ "cram_versions": { "content": [ [ - "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + "versions.yml:md5,176db5ec46b965219604bcdbb3ef9e07" ] ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-16T09:24:12.95416913" + }, + "cram_to_bam_index_qname_unselected": { + "content": [ + "test.unselected.bam" + ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nextflow": "23.04.3" }, - "timestamp": "2024-02-13T16:12:39.913411036" + "timestamp": "2024-02-12T19:38:23.322874" }, "bam_sam": { "content": [ @@ -477,7 +517,7 @@ }, "bam_stub_csi": { "content": [ - "test.csi" + "test.bam.csi" ], "meta": { "nf-test": "0.8.4", diff --git a/modules/nf-core/seqtk/subseq/environment.yml b/modules/nf-core/seqtk/subseq/environment.yml index 7abe3644..693aa5c1 100644 --- a/modules/nf-core/seqtk/subseq/environment.yml +++ b/modules/nf-core/seqtk/subseq/environment.yml @@ -1,7 +1,5 @@ -name: seqtk_subseq channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::seqtk=1.4 diff --git a/modules/nf-core/seqtk/subseq/meta.yml b/modules/nf-core/seqtk/subseq/meta.yml index 4e8ee19f..2667f40d 100644 --- a/modules/nf-core/seqtk/subseq/meta.yml +++ b/modules/nf-core/seqtk/subseq/meta.yml @@ -6,29 +6,42 @@ keywords: - fastx tools: - seqtk: - description: Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format + description: Seqtk is a fast and lightweight tool for processing sequences in + the FASTA or FASTQ format homepage: https://github.com/lh3/seqtk documentation: https://docs.csc.fi/apps/seqtk/ tool_dev_url: https://github.com/lh3/seqtk licence: ["MIT"] + identifier: biotools:seqtk input: - - sequences: - type: file - description: FASTQ/FASTA file - pattern: "*.{fq,fq.gz,fa,fa.gz}" - - filter_list: - type: file - description: BED file or a text file with a list of sequence names - pattern: "*.{bed,lst}" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - sequences: + type: file + description: FASTQ/FASTA file + pattern: "*.{fq,fq.gz,fa,fa.gz}" + - - filter_list: + type: file + description: BED file or a text file with a list of sequence names + pattern: "*.{bed,lst}" output: - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - sequences: - type: file - description: FASTQ/FASTA file - pattern: "*.{fq.gz,fa.gz}" + - meta: + type: file + description: FASTQ/FASTA file + pattern: "*.{fq.gz,fa.gz}" + - "*.gz": + type: file + description: FASTQ/FASTA file + pattern: "*.{fq.gz,fa.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@sidorov-si" maintainers: diff --git a/modules/nf-core/seqtk/subseq/tests/main.nf.test b/modules/nf-core/seqtk/subseq/tests/main.nf.test index be5602e3..fa8fad69 100644 --- a/modules/nf-core/seqtk/subseq/tests/main.nf.test +++ b/modules/nf-core/seqtk/subseq/tests/main.nf.test @@ -17,9 +17,9 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) """ } } @@ -40,9 +40,9 @@ nextflow_process { """ input[0] = [ [ id:'test' ], - file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] - input[1] = file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed.gz', checkIfExists: true) """ } } diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 00000000..c7794856 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 00000000..9bd8f554 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,84 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:22.04' : + 'nf-core/ubuntu:22.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir ${prefix} + ## Dry-run untaring the archive to get the files and place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch \${i} + else + mkdir -p \${i} + fi + done + else + for i in `tar -tf ${archive}`; + do + if [[ \$(echo "\${i}" | grep -E "/\$") == "" ]]; + then + touch ${prefix}/\${i} + else + mkdir -p ${prefix}/\${i} + fi + done + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 00000000..290346b3 --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,49 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - untar: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - $prefix: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 00000000..c957517a --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,85 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } + + test("test_untar_onlyfiles - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + } +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 00000000..ceb91b79 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,158 @@ +{ + "test_untar_onlyfiles": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:28.231047" + }, + "test_untar_onlyfiles - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hello.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:45.773103" + }, + "test_untar - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "opts.k2d:md5,d41d8cd98f00b204e9800998ecf8427e", + "taxo.k2d:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:36.777441" + }, + "test_untar": { + "content": [ + { + "0": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "1": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ], + "untar": [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ], + "versions": [ + "versions.yml:md5,6063247258c56fd271d076bb04dd7536" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-10T12:04:19.377674" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 00000000..feb6f15c --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/modules/nf-core/windowmasker/mkcounts/environment.yml b/modules/nf-core/windowmasker/mkcounts/environment.yml index e4d72108..777e097e 100644 --- a/modules/nf-core/windowmasker/mkcounts/environment.yml +++ b/modules/nf-core/windowmasker/mkcounts/environment.yml @@ -1,7 +1,5 @@ -name: windowmasker_mkcounts channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::blast=2.15.0 diff --git a/modules/nf-core/windowmasker/mkcounts/meta.yml b/modules/nf-core/windowmasker/mkcounts/meta.yml index 436ed7a5..825a0674 100644 --- a/modules/nf-core/windowmasker/mkcounts/meta.yml +++ b/modules/nf-core/windowmasker/mkcounts/meta.yml @@ -11,31 +11,32 @@ tools: homepage: https://github.com/ncbi/ncbi-cxx-toolkit-public documentation: https://ncbi.github.io/cxx-toolkit/ licence: ["MIT"] + identifier: biotools:windowmasker input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - ref: - type: file - description: An input nucleotide fasta file. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ref: + type: file + description: An input nucleotide fasta file. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - intervals: - type: file - description: | - An output file containing genomic locations of low - complexity and highly repetitive regions - pattern: "${prefix}.txt" + - counts: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: A file containing frequency counts of repetitive units. + pattern: "*.txt" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@DLBPointon" maintainers: diff --git a/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test index 18c4977c..bf53d7fa 100644 --- a/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test +++ b/modules/nf-core/windowmasker/mkcounts/tests/main.nf.test @@ -20,7 +20,7 @@ nextflow_process { """ input[0] = [ [id: "test" ], - [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] ] """ } @@ -43,7 +43,7 @@ nextflow_process { """ input[0] = [ [id: "test" ], - [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] ] """ } diff --git a/modules/nf-core/windowmasker/ustat/environment.yml b/modules/nf-core/windowmasker/ustat/environment.yml index b83d82e5..777e097e 100644 --- a/modules/nf-core/windowmasker/ustat/environment.yml +++ b/modules/nf-core/windowmasker/ustat/environment.yml @@ -1,7 +1,5 @@ -name: windowmasker_ustat channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::blast=2.15.0 diff --git a/modules/nf-core/windowmasker/ustat/meta.yml b/modules/nf-core/windowmasker/ustat/meta.yml index 6a07c935..bc51a934 100644 --- a/modules/nf-core/windowmasker/ustat/meta.yml +++ b/modules/nf-core/windowmasker/ustat/meta.yml @@ -1,5 +1,6 @@ name: windowmasker_ustat -description: A program to take a counts file and creates a file of genomic co-ordinates to be masked. +description: A program to take a counts file and creates a file of genomic co-ordinates + to be masked. keywords: - fasta - interval @@ -11,39 +12,39 @@ tools: homepage: https://github.com/ncbi/ncbi-cxx-toolkit-public documentation: https://ncbi.github.io/cxx-toolkit/ licence: ["MIT"] + identifier: biotools:windowmasker input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test' ] - - counts: - type: file - description: Contains count data of repetitive regions. - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - ref: - type: file - description: An input nucleotide fasta file. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - counts: + type: file + description: Contains count data of repetitive regions. + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ref: + type: file + description: An input nucleotide fasta file. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - wm_intervals: - type: file - description: | - An output file containing genomic locations of low - complexity and highly repetitive regions - pattern: "${output}" + - intervals: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${output}: + type: file + description: intervals - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@DLBPointon" maintainers: diff --git a/modules/nf-core/windowmasker/ustat/tests/main.nf.test b/modules/nf-core/windowmasker/ustat/tests/main.nf.test index 58d91b13..6e02c9c1 100644 --- a/modules/nf-core/windowmasker/ustat/tests/main.nf.test +++ b/modules/nf-core/windowmasker/ustat/tests/main.nf.test @@ -19,7 +19,7 @@ nextflow_process { """ input[0] = [ [id: "test" ], - [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] ] """ } @@ -33,7 +33,7 @@ nextflow_process { input[0] = WINDOWMASKER_MKCOUNTS.out.counts input[1] = [ [id: "test" ], - [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] ] """ } @@ -51,7 +51,7 @@ nextflow_process { input[0] = WINDOWMASKER_MKCOUNTS.out.counts input[1] = [ [id: "test" ], - [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] ] """ } diff --git a/nextflow.config b/nextflow.config index 18994df9..1157416f 100644 --- a/nextflow.config +++ b/nextflow.config @@ -16,6 +16,7 @@ params { mask = false fetchngs_samplesheet = false busco_lineages = null + precomputed_busco = null // Reference options fasta = null @@ -54,7 +55,10 @@ params { monochrome_logs = false hook_url = null help = false + help_full = false + show_hidden = false version = false + trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options // Config options config_profile_name = null @@ -64,147 +68,150 @@ params { config_profile_contact = null config_profile_url = null - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '128.GB' - max_cpus = 16 - max_time = '240.h' - // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationSchemaIgnoreParams = 'genomes,igenomes_base' - validationShowHiddenParams = false - validate_params = true - + validate_params = true } // Load base.config by default for all pipelines includeConfig 'conf/base.config' -// Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} - -// Load sanger-tol/blobtoolkit custom profiles from different institutions. -// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! -// try { -// includeConfig "${params.custom_config_base}/pipeline/blobtoolkit.config" -// } catch (Exception e) { -// System.err.println("WARNING: Could not load nf-core/config/blobtoolkit profiles: ${params.custom_config_base}/pipeline/blobtoolkit.config") -// } profiles { debug { - dumpHashes = true - process.beforeScript = 'echo $HOSTNAME' - cleanup = false + dumpHashes = true + process.beforeScript = 'echo $HOSTNAME' + cleanup = false nextflow.enable.configProcessNamesValidation = true } conda { - conda.enabled = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + conda.channels = ['conda-forge', 'bioconda'] + apptainer.enabled = false } mamba { - conda.enabled = true - conda.useMamba = true - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + conda.enabled = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } docker { - docker.enabled = true - conda.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false - docker.runOptions = '-u $(id -u):$(id -g)' + docker.enabled = true + conda.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false + docker.runOptions = '-u $(id -u):$(id -g)' } arm { - docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' + docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64' } singularity { - singularity.enabled = true - singularity.autoMounts = true - conda.enabled = false - docker.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + singularity.enabled = true + singularity.autoMounts = true + conda.enabled = false + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } podman { - podman.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - shifter.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + podman.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } shifter { - shifter.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - charliecloud.enabled = false - apptainer.enabled = false + shifter.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + apptainer.enabled = false } charliecloud { - charliecloud.enabled = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - apptainer.enabled = false + charliecloud.enabled = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + apptainer.enabled = false } apptainer { - apptainer.enabled = true - apptainer.autoMounts = true - conda.enabled = false - docker.enabled = false - singularity.enabled = false - podman.enabled = false - shifter.enabled = false - charliecloud.enabled = false + apptainer.enabled = true + apptainer.autoMounts = true + conda.enabled = false + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' + } + wave { + apptainer.ociAutoPull = true + singularity.ociAutoPull = true + wave.enabled = true + wave.freeze = true + wave.strategy = 'conda,container' } gitpod { - executor.name = 'local' - executor.cpus = 4 - executor.memory = 8.GB + executor.name = 'local' + executor.cpus = 4 + executor.memory = 8.GB + process { + resourceLimits = [ + memory: 8.GB, + cpus : 4, + time : 1.h + ] + } } cleanup { cleanup = true } test { includeConfig 'conf/test.config' } test_raw { includeConfig 'conf/test_raw.config' } test_full { includeConfig 'conf/test_full.config' } + test_nobusco { includeConfig 'conf/test_nobusco.config' } } -// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile -// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Load nf-core custom profiles from different Institutions +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" + +// Load sanger-tol/blobtoolkit custom profiles from different institutions. +// Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs +// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/blobtoolkit.config" : "/dev/null" + +// Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled // Set to your registry if you have a mirror of containers -apptainer.registry = 'quay.io' -docker.registry = 'quay.io' -podman.registry = 'quay.io' -singularity.registry = 'quay.io' +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' +charliecloud.registry = 'quay.io' + -// Nextflow plugins -plugins { - id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet -} // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. @@ -217,75 +224,149 @@ env { JULIA_DEPOT_PATH = "/usr/local/share/julia" } -// Capture exit codes from upstream processes when piping -process.shell = ['/bin/bash', '-euo', 'pipefail'] +// Set bash options +process.shell = [ + "bash", + "-C", // No clobber - prevent output redirection from overwriting files. + "-e", // Exit if a tool returns a non-zero status/exit code + "-u", // Treat unset variables and parameters as an error + "-o", // Returns the status of the last command to exit.. + "pipefail" // ..with a non-zero status or zero if all successfully execute +] // Disable process selector warnings by default. Use debug profile to enable warnings. nextflow.enable.configProcessNamesValidation = false -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.outdir}/pipeline_info/blobtoolkit/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/blobtoolkit/execution_timeline_${params.trace_report_suffix}.html" } report { enabled = true - file = "${params.outdir}/pipeline_info/blobtoolkit/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/blobtoolkit/execution_report_${params.trace_report_suffix}.html" } trace { enabled = true - file = "${params.outdir}/pipeline_info/blobtoolkit/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/blobtoolkit/execution_trace_${params.trace_report_suffix}.txt" fields = 'task_id,hash,native_id,process,tag,status,exit,cpus,memory,time,attempt,submit,start,complete,duration,%cpu,%mem,peak_rss,rchar,wchar' } dag { enabled = true - file = "${params.outdir}/pipeline_info/blobtoolkit/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/blobtoolkit/pipeline_dag_${params.trace_report_suffix}.html" } manifest { name = 'sanger-tol/blobtoolkit' - author = """@zb32, @rjchallis, @alxndrdiaz, @sujaikumar, @muffato, @gq1, @priyanka-surana""" + author = """Muffato, Matthieu""" // The author field is deprecated from Nextflow version 24.10.0, use contributors instead + contributors = [ + [ + name: 'Butt, Zaynab', + affiliation: 'Wellcome Sanger Institute', + github: 'https://github.com/zb32', + contribution: ['contributor'], + orcid: 'https://orcid.org/0009-0009-7934-8440' + ], + [ + name: 'Chafin, Tyler', + affiliation: 'Wellcome Sanger Institute', + github: 'https://github.com/tkchafin', + contribution: ['contributor'], + orcid: 'https://orcid.org/0000-0001-8687-5905' + ], + [ + name: 'Challis, Rich', + affiliation: 'Wellcome Sanger Institute', + github: 'https://github.com/rjchallis', + contribution: ['author'], + orcid: 'https://orcid.org/0000-0002-3502-112' + ], + [ + name: 'Kumar, Sujai', + affiliation: 'Wellcome Sanger Institute', + github: 'https://github.com/sujaikumar', + contribution: ['author'], + orcid: 'https://orcid.org/0000-0001-5902-6641' + ], + [ + name: 'Muffato, Matthieu', + affiliation: 'Wellcome Sanger Institute', + github: 'https://github.com/muffato', + contribution: ['author', 'maintainer'], + orcid: 'https://orcid.org/0000-0002-7860-3560' + ], + [ + name: 'Qi, Guoying', + affiliation: 'Wellcome Sanger Institute', + github: 'https://github.com/gq1', + contribution: ['contributor'], + orcid: 'https://orcid.org/0000-0003-1262-8973' + ], + [ + name: 'Ramos Díaz, Alexander', + github: 'https://github.com/alxndrdiaz', + contribution: ['author'], + orcid: 'https://orcid.org/0000-0001-6410-3349' + ], + [ + name: 'Surana, Priyanka', + affiliation: 'Wellcome Sanger Institute', + github: 'https://github.com/priyanka-surana', + contribution: ['author'], + orcid: 'https://orcid.org/0000-0002-7167-0875' + ], + [ + name: 'Yates, Bethan', + affiliation: 'Wellcome Sanger Institute', + github: 'https://github.com/BethYates', + contribution: ['contributor'], + orcid: 'https://orcid.org/0000-0003-1658-1762' + ], + ] homePage = 'https://github.com/sanger-tol/blobtoolkit' description = """Quality assessment of genome assemblies""" mainScript = 'main.nf' - nextflowVersion = '!>=23.04.0' - version = '0.6.0' + defaultBranch = 'main' + nextflowVersion = '!>=24.04.2' + version = '0.7.0' doi = '10.5281/zenodo.7949058' } -// Load modules.config for DSL2 module specific options -includeConfig 'conf/modules.config' +// Nextflow plugins +plugins { + id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } +validation { + defaultIgnoreParams = ["genomes"] + monochromeLogs = params.monochrome_logs + help { + enabled = true + command = "nextflow run sanger-tol/blobtoolkit -profile --input samplesheet.csv --outdir " + fullParameter = "help_full" + showHiddenParameter = "show_hidden" + beforeText = """ +-\033[2m----------------------------------------------------\033[0m- +\033[0;34m _____ \033[0;32m _______ \033[0;31m _\033[0m +\033[0;34m / ____| \033[0;32m|__ __| \033[0;31m| |\033[0m +\033[0;34m | (___ __ _ _ __ __ _ ___ _ __ \033[0m ___ \033[0;32m| |\033[0;33m ___ \033[0;31m| |\033[0m +\033[0;34m \\___ \\ / _` | '_ \\ / _` |/ _ \\ '__|\033[0m|___|\033[0;32m| |\033[0;33m/ _ \\\033[0;31m| |\033[0m +\033[0;34m ____) | (_| | | | | (_| | __/ | \033[0;32m| |\033[0;33m (_) \033[0;31m| |____\033[0m +\033[0;34m |_____/ \\__,_|_| |_|\\__, |\\___|_| \033[0;32m|_|\033[0;33m\\___/\033[0;31m|______|\033[0m +\033[0;34m __/ |\033[0m +\033[0;34m |___/\033[0m +\033[0;35m ${manifest.name} ${manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x +* Software dependencies + https://github.com/sanger-tol/blobtoolkit/blob/main/CITATIONS.md +""" + } + summary { + beforeText = validation.help.beforeText + afterText = validation.help.afterText } } @@ -313,6 +394,8 @@ def positive_log(value, base) { } def log_increase_cpus(start, step, value, base) { - return check_max(start + step * (1 + Math.ceil(positive_log(value, base))), 'cpus') + return start + step * (1 + Math.ceil(positive_log(value, base))) } +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' diff --git a/nextflow_schema.json b/nextflow_schema.json index e722369d..636c65d4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,10 +1,10 @@ { - "$schema": "http://json-schema.org/draft-07/schema", - "$id": "https://raw.githubusercontent.com/sanger-tol/blobtoolkit/master/nextflow_schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/sanger-tol/blobtoolkit/main/nextflow_schema.json", "title": "sanger-tol/blobtoolkit pipeline parameters", "description": "Quality assessment of genome assemblies", "type": "object", - "definitions": { + "$defs": { "input_output_options": { "title": "Input/output options", "type": "object", @@ -45,14 +45,21 @@ "busco_lineages": { "type": "string", "pattern": "([^,]+_odb10,)*[^,]+_odb10", - "description": "Name of the ODB10 lineages to run BUSCO on. By default, the pipeline will automatically selecy the correct lineage based on the taxonomy.", + "description": "Name of the ODB10 lineages to run BUSCO on. By default, the pipeline will automatically select the correct lineage based on the taxonomy.", "fa_icon": "fas fa-bone" }, + "precomputed_busco": { + "type": "string", + "format": "path", + "description": "Directory containing precomputed BUSCO results (each named as `run_{lineage}_odbxx`), which can be supplied to skip running BUSCO in the pipeline.", + "fa_icon": "fas fa-folder-open" + }, "image_format": { "type": "string", "enum": ["png", "svg"], "description": "Select the format of the output images.", - "fa_icon": "fas fa-image" + "fa_icon": "fas fa-image", + "default": "png" }, "outdir": { "type": "string", @@ -93,6 +100,7 @@ "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", @@ -109,40 +117,45 @@ "properties": { "busco": { "type": "string", - "format": "directory-path", + "format": "path", "description": "Local directory where clade-specific BUSCO lineage datasets are stored", "fa_icon": "fas fa-folder-open" }, "lineage_tax_ids": { "type": "string", "format": "file-path", - "description": "Local file that holds a mapping between BUSCO lineages and taxon IDs.", + "exists": true, + "description": "File that holds a mapping between BUSCO lineages and taxon IDs.", "help_text": "Initialised from https://busco-data.ezlab.org/v5/data/placement_files/mapping_taxids-busco_dataset_name.*.2019-12-16.txt.tar.gz", "fa_icon": "fas fa-file-code" }, "blastp": { "type": "string", - "format": "file-path", - "pattern": "^\\S+\\.dmnd$", + "format": "path", + "exists": true, + "pattern": "^\\S+\\.dmnd.*$", "description": "Path to the Diamond species-specific buscogenes database", "fa_icon": "fas fa-file-archive" }, "blastx": { "type": "string", - "format": "file-path", - "pattern": "^\\S+\\.dmnd$", + "format": "path", + "exists": true, + "pattern": "^\\S+\\.dmnd.*$", "description": "Path to the Diamond species-specific buscoregions database", "fa_icon": "fas fa-file-archive" }, "blastn": { "type": "string", - "format": "directory-path", + "format": "path", + "exists": true, "description": "Path to the nucleotide BLAST database", "fa_icon": "fas fa-file-archive" }, "taxdump": { "type": "string", - "format": "directory-path", + "format": "path", + "exists": true, "description": "Path to the new NCBI tax dump database", "fa_icon": "fas fa-folder-open" } @@ -211,41 +224,6 @@ } } }, - "max_job_request_options": { - "title": "Max job request options", - "type": "object", - "fa_icon": "fab fa-acquisitions-incorporated", - "description": "Set the top limit for requested resources for any single job.", - "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", - "properties": { - "max_cpus": { - "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 16, - "fa_icon": "fas fa-microchip", - "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" - }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "128.GB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "240.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", - "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" - } - } - }, "generic_options": { "title": "Generic options", "type": "object", @@ -253,12 +231,6 @@ "description": "Less common options for the pipeline, typically set in a config file.", "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { - "help": { - "type": "boolean", - "description": "Display help text.", - "fa_icon": "fas fa-question-circle", - "hidden": true - }, "version": { "type": "boolean", "description": "Display version and exit.", @@ -334,51 +306,36 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "validationShowHiddenParams": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "validationFailUnrecognisedParams": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters fails when an unrecognised parameter is found.", - "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." - }, - "validationLenientMode": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters in lenient more.", - "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." + "trace_report_suffix": { + "type": "string", + "fa_icon": "far calendar", + "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.", + "hidden": true } } } }, "allOf": [ { - "$ref": "#/definitions/input_output_options" + "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/definitions/reference_genome_options" + "$ref": "#/$defs/institutional_config_options" }, { - "$ref": "#/definitions/databases" + "$ref": "#/$defs/reference_genome_options" }, { - "$ref": "#/definitions/execution" + "$ref": "#/$defs/databases" }, { - "$ref": "#/definitions/institutional_config_options" + "$ref": "#/$defs/execution" }, { - "$ref": "#/definitions/max_job_request_options" + "$ref": "#/$defs/institutional_config_options" }, { - "$ref": "#/definitions/generic_options" + "$ref": "#/$defs/generic_options" } ] } diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index 0d62beb6..00000000 --- a/pyproject.toml +++ /dev/null @@ -1,10 +0,0 @@ -# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. -# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. -[tool.black] -line-length = 120 -target_version = ["py37", "py38", "py39", "py310"] - -[tool.isort] -profile = "black" -known_first_party = ["nf_core"] -multi_line_output = 3 diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json new file mode 100644 index 00000000..dc61b0c4 --- /dev/null +++ b/ro-crate-metadata.json @@ -0,0 +1,315 @@ +{ + "@context": [ + "https://w3id.org/ro/crate/1.1/context", + { + "GithubService": "https://w3id.org/ro/terms/test#GithubService", + "JenkinsService": "https://w3id.org/ro/terms/test#JenkinsService", + "PlanemoEngine": "https://w3id.org/ro/terms/test#PlanemoEngine", + "TestDefinition": "https://w3id.org/ro/terms/test#TestDefinition", + "TestInstance": "https://w3id.org/ro/terms/test#TestInstance", + "TestService": "https://w3id.org/ro/terms/test#TestService", + "TestSuite": "https://w3id.org/ro/terms/test#TestSuite", + "TravisService": "https://w3id.org/ro/terms/test#TravisService", + "definition": "https://w3id.org/ro/terms/test#definition", + "engineVersion": "https://w3id.org/ro/terms/test#engineVersion", + "instance": "https://w3id.org/ro/terms/test#instance", + "resource": "https://w3id.org/ro/terms/test#resource", + "runsOn": "https://w3id.org/ro/terms/test#runsOn" + } + ], + "@graph": [ + { + "@id": "./", + "@type": "Dataset", + "creativeWorkStatus": "Stable", + "datePublished": "2025-03-07T13:35:10+00:00", + "description": "# sanger-tol/blobtoolkit\n\n[![GitHub Actions CI Status](https://github.com/sanger-tol/blobtoolkit/actions/workflows/ci.yml/badge.svg)](https://github.com/sanger-tol/blobtoolkit/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/sanger-tol/blobtoolkit/actions/workflows/linting.yml/badge.svg)](https://github.com/sanger-tol/blobtoolkit/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/sanger-tol/blobtoolkit)\n\n## Introduction\n\n**sanger-tol/blobtoolkit** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run sanger-tol/blobtoolkit \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n## Credits\n\nsanger-tol/blobtoolkit was originally written by priyanka-surana.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "hasPart": [ + { + "@id": "main.nf" + }, + { + "@id": "assets/" + }, + { + "@id": "conf/" + }, + { + "@id": "docs/" + }, + { + "@id": "modules/" + }, + { + "@id": "modules/nf-core/" + }, + { + "@id": "workflows/" + }, + { + "@id": "subworkflows/" + }, + { + "@id": "nextflow.config" + }, + { + "@id": "README.md" + }, + { + "@id": "nextflow_schema.json" + }, + { + "@id": "CHANGELOG.md" + }, + { + "@id": "LICENSE" + }, + { + "@id": "CITATIONS.md" + }, + { + "@id": "modules.json" + }, + { + "@id": "docs/usage.md" + }, + { + "@id": "docs/output.md" + }, + { + "@id": ".nf-core.yml" + }, + { + "@id": ".pre-commit-config.yaml" + }, + { + "@id": ".prettierignore" + } + ], + "isBasedOn": "https://github.com/sanger-tol/blobtoolkit", + "license": "MIT", + "mainEntity": { + "@id": "main.nf" + }, + "mentions": [ + { + "@id": "#8275489f-2726-4d94-b502-adfbd72b7b9d" + } + ], + "name": "sanger-tol/blobtoolkit" + }, + { + "@id": "ro-crate-metadata.json", + "@type": "CreativeWork", + "about": { + "@id": "./" + }, + "conformsTo": [ + { + "@id": "https://w3id.org/ro/crate/1.1" + }, + { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate/1.0" + } + ] + }, + { + "@id": "main.nf", + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], + "creator": [ + { + "@id": "#ps22@sanger.ac.uk" + }, + { + "@id": "https://orcid.org/0000-0002-7860-3560" + } + ], + "dateCreated": "", + "dateModified": "2025-03-07T13:35:10Z", + "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", + "keywords": [ + "nf-core", + "nextflow" + ], + "license": [ + "MIT" + ], + "name": [ + "sanger-tol/blobtoolkit" + ], + "programmingLanguage": { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" + }, + "sdPublisher": { + "@id": "https://nf-co.re/" + }, + "url": [ + "https://github.com/sanger-tol/blobtoolkit", + "https://nf-co.re/sanger-tol/blobtoolkit/0.7.0/" + ], + "version": [ + "0.7.0" + ] + }, + { + "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", + "@type": "ComputerLanguage", + "identifier": { + "@id": "https://www.nextflow.io/" + }, + "name": "Nextflow", + "url": { + "@id": "https://www.nextflow.io/" + }, + "version": "!>=24.04.2" + }, + { + "@id": "#8275489f-2726-4d94-b502-adfbd72b7b9d", + "@type": "TestSuite", + "instance": [ + { + "@id": "#c10b809d-8e67-421b-acff-2c4de88bcf1f" + } + ], + "mainEntity": { + "@id": "main.nf" + }, + "name": "Test suite for sanger-tol/blobtoolkit" + }, + { + "@id": "#c10b809d-8e67-421b-acff-2c4de88bcf1f", + "@type": "TestInstance", + "name": "GitHub Actions workflow for testing sanger-tol/blobtoolkit", + "resource": "repos/sanger-tol/blobtoolkit/actions/workflows/ci.yml", + "runsOn": { + "@id": "https://w3id.org/ro/terms/test#GithubService" + }, + "url": "https://api.github.com" + }, + { + "@id": "https://w3id.org/ro/terms/test#GithubService", + "@type": "TestService", + "name": "Github Actions", + "url": { + "@id": "https://github.com" + } + }, + { + "@id": "assets/", + "@type": "Dataset", + "description": "Additional files" + }, + { + "@id": "conf/", + "@type": "Dataset", + "description": "Configuration files" + }, + { + "@id": "docs/", + "@type": "Dataset", + "description": "Markdown files for documenting the pipeline" + }, + { + "@id": "modules/", + "@type": "Dataset", + "description": "Modules used by the pipeline" + }, + { + "@id": "modules/nf-core/", + "@type": "Dataset", + "description": "nf-core modules" + }, + { + "@id": "workflows/", + "@type": "Dataset", + "description": "Main pipeline workflows to be executed in main.nf" + }, + { + "@id": "subworkflows/", + "@type": "Dataset", + "description": "Smaller subworkflows" + }, + { + "@id": "nextflow.config", + "@type": "File", + "description": "Main Nextflow configuration file" + }, + { + "@id": "README.md", + "@type": "File", + "description": "Basic pipeline usage information" + }, + { + "@id": "nextflow_schema.json", + "@type": "File", + "description": "JSON schema for pipeline parameter specification" + }, + { + "@id": "CHANGELOG.md", + "@type": "File", + "description": "Information on changes made to the pipeline" + }, + { + "@id": "LICENSE", + "@type": "File", + "description": "The license - should be MIT" + }, + { + "@id": "CITATIONS.md", + "@type": "File", + "description": "Citations needed when using the pipeline" + }, + { + "@id": "modules.json", + "@type": "File", + "description": "Version information for modules from nf-core/modules" + }, + { + "@id": "docs/usage.md", + "@type": "File", + "description": "Usage documentation" + }, + { + "@id": "docs/output.md", + "@type": "File", + "description": "Output documentation" + }, + { + "@id": ".nf-core.yml", + "@type": "File", + "description": "nf-core configuration file, configuring template features and linting rules" + }, + { + "@id": ".pre-commit-config.yaml", + "@type": "File", + "description": "Configuration file for pre-commit hooks" + }, + { + "@id": ".prettierignore", + "@type": "File", + "description": "Ignore file for prettier" + }, + { + "@id": "https://nf-co.re/", + "@type": "Organization", + "name": "nf-core", + "url": "https://nf-co.re/" + }, + { + "@id": "#ps22@sanger.ac.uk", + "@type": "Person", + "email": "ps22@sanger.ac.uk", + "name": "Priyanka Surana" + }, + { + "@id": "https://orcid.org/0000-0002-7860-3560", + "@type": "Person", + "email": "mm49@sanger.ac.uk", + "name": "Matthieu Muffato" + } + ] +} \ No newline at end of file diff --git a/subworkflows/local/blobtools.nf b/subworkflows/local/blobtools.nf index 747bc9fa..eea7f432 100644 --- a/subworkflows/local/blobtools.nf +++ b/subworkflows/local/blobtools.nf @@ -2,13 +2,14 @@ // Create BlobTools dataset // -include { BLOBTOOLKIT_METADATA } from '../../modules/local/blobtoolkit/metadata' include { BLOBTOOLKIT_CREATEBLOBDIR } from '../../modules/local/blobtoolkit/createblobdir' include { BLOBTOOLKIT_UPDATEBLOBDIR } from '../../modules/local/blobtoolkit/updateblobdir' workflow BLOBTOOLS { take: config // channel: [ val(meta), path(config) ] + syn_tsv // channel: [ val(meta), [path(tsv)] ] + cat_tsv // channel: [ val(meta), [path(tsv)] ] windowstats // channel: [ val(meta), path(window_stats_tsvs) ] busco // channel: [ val(meta), path(full_table) ] blastp // channel: [ val(meta), path(txt) ] @@ -21,29 +22,21 @@ workflow BLOBTOOLS { ch_versions = Channel.empty() - // - // Create metadata summary file - // - BLOBTOOLKIT_METADATA ( config ) - ch_versions = ch_versions.mix ( BLOBTOOLKIT_METADATA.out.versions.first() ) - - // // Create Blobtools dataset files // - BLOBTOOLKIT_CREATEBLOBDIR ( windowstats, busco, blastp, BLOBTOOLKIT_METADATA.out.yaml, taxdump ) + BLOBTOOLKIT_CREATEBLOBDIR ( windowstats, busco, blastp, config, taxdump ) ch_versions = ch_versions.mix ( BLOBTOOLKIT_CREATEBLOBDIR.out.versions.first() ) // // Update Blobtools dataset files // - BLOBTOOLKIT_UPDATEBLOBDIR ( BLOBTOOLKIT_CREATEBLOBDIR.out.blobdir, blastx, blastn, taxdump ) + BLOBTOOLKIT_UPDATEBLOBDIR ( BLOBTOOLKIT_CREATEBLOBDIR.out.blobdir, syn_tsv, cat_tsv, blastx, blastn, taxdump ) ch_versions = ch_versions.mix ( BLOBTOOLKIT_UPDATEBLOBDIR.out.versions.first() ) emit: - metadata = BLOBTOOLKIT_METADATA.out.yaml // channel: [ val(meta), path(yaml) ] blobdir = BLOBTOOLKIT_UPDATEBLOBDIR.out.blobdir // channel: [ val(meta), path(dir) ] versions = ch_versions // channel: [ versions.yml ] } diff --git a/subworkflows/local/busco_diamond_blastp.nf b/subworkflows/local/busco_diamond_blastp.nf index 4b07723e..5c0a5bd4 100644 --- a/subworkflows/local/busco_diamond_blastp.nf +++ b/subworkflows/local/busco_diamond_blastp.nf @@ -2,7 +2,7 @@ // Run BUSCO for a genome and runs diamond_blastp // -include { BUSCO } from '../../modules/nf-core/busco/main' +include { BUSCO_BUSCO } from '../../modules/nf-core/busco/busco/main' include { BLOBTOOLKIT_EXTRACTBUSCOS } from '../../modules/local/blobtoolkit/extractbuscos' include { DIAMOND_BLASTP } from '../../modules/nf-core/diamond/blastp/main' include { RESTRUCTUREBUSCODIR } from '../../modules/local/restructurebuscodir' @@ -15,14 +15,14 @@ workflow BUSCO_DIAMOND { busco_db // channel: path(busco_db) blastp // channel: path(blastp_db) taxon_id // channel: val(taxon_id) - + precomputed_busco // channel: [ val(meta}, path(busco_run_dir) ] optional precomputed busco outputs main: ch_versions = Channel.empty() // - // Prepare the BUSCO linages + // Prepare the BUSCO lineages // // 0. Initialise sone variables basal_lineages = [ "eukaryota_odb10", "bacteria_odb10", "archaea_odb10" ] @@ -40,55 +40,130 @@ workflow BUSCO_DIAMOND { | set { ch_fasta_with_lineage } + // + // Format pre-computed outputs + // + ch_precomputed_busco = precomputed_busco + .map { meta, dir -> [meta.lineage, [meta, dir]] } + + ch_combined = ch_fasta_with_lineage + .map { meta, fasta -> [meta.lineage_name, [meta, fasta]] } + .join(ch_precomputed_busco, by: 0, remainder: true) + .map { lineage, fasta_data, busco_data -> + def (meta, fasta) = fasta_data + def (busco_meta, busco_dir) = busco_data ?: [null, null] + [meta + [busco_dir: busco_dir], fasta] + } + + // Branch based on whether there's a pre-computed BUSCO output + ch_busco_to_run = ch_combined.branch { + precomputed: it[0].busco_dir != null + to_compute: true + } + + + // Format precomputed BUSCO outputs + ch_formatted_precomputed = ch_busco_to_run.precomputed + .map { meta, fasta -> + def busco_dir = file(meta.busco_dir) + def lineage = meta.lineage_name + [ + meta, + [ + batch_summary: [], + short_summaries_txt: file("${busco_dir}/short_summary.txt"), + short_summaries_json: file("${busco_dir}/short_summary.json"), + full_table: file("${busco_dir}/full_table.tsv"), + missing_busco_list: file("${busco_dir}/missing_busco_list.tsv"), + single_copy_proteins: file("${busco_dir}/single_copy_proteins.faa"), + seq_dir: file("${busco_dir}/busco_sequences"), + translated_dir: file("${busco_dir}/translated_proteins"), + busco_dir: busco_dir + ] + ] + } + // // Run BUSCO search // - BUSCO ( - ch_fasta_with_lineage, - "genome", - ch_fasta_with_lineage.map { it[0].lineage_name }, + BUSCO_BUSCO( + ch_busco_to_run.to_compute, + 'genome', + ch_busco_to_run.to_compute.map { it[0].lineage_name }, busco_db, - [], + [] ) - ch_versions = ch_versions.mix ( BUSCO.out.versions.first() ) - + ch_versions = ch_versions.mix ( BUSCO_BUSCO.out.versions.first() ) + + // Join new and pre-computed BUSCO outputs + ch_all_busco_outputs = BUSCO_BUSCO.out.batch_summary + .join(BUSCO_BUSCO.out.short_summaries_txt, by: 0, remainder: true ) + .join(BUSCO_BUSCO.out.short_summaries_json, by: 0, remainder: true ) + .join(BUSCO_BUSCO.out.full_table, by: 0, remainder: true ) + .join(BUSCO_BUSCO.out.missing_busco_list, by: 0, remainder: true ) + .join(BUSCO_BUSCO.out.single_copy_proteins, by: 0, remainder: true) + .join(BUSCO_BUSCO.out.seq_dir, by: 0) + .join(BUSCO_BUSCO.out.translated_dir, by: 0, remainder: true ) + .join(BUSCO_BUSCO.out.busco_dir, by: 0) + .map { meta, batch_summary, short_summaries_txt, short_summaries_json, full_table, missing_busco_list, single_copy_proteins, seq_dir, translated_dir, busco_dir -> + [ + meta, + [ + batch_summary: batch_summary, + short_summaries_txt: short_summaries_txt, + short_summaries_json: short_summaries_json, + full_table: full_table, + missing_busco_list: missing_busco_list, + single_copy_proteins: single_copy_proteins, + seq_dir: seq_dir, + translated_dir: translated_dir, + busco_dir: busco_dir + ] + ] + } + .mix(ch_formatted_precomputed) // // Tidy up the BUSCO output directories before publication // RESTRUCTUREBUSCODIR( - BUSCO.out.seq_dir - | map { meta, seq -> [meta, meta.lineage_name] } - | join ( BUSCO.out.batch_summary ) - | join ( BUSCO.out.short_summaries_txt, remainder: true ) - | join ( BUSCO.out.short_summaries_json, remainder: true ) - | join ( BUSCO.out.busco_dir ) - | map { meta, lineage, batch_summary, short_summaries_txt, short_summaries_json, busco_dir -> [meta, lineage, batch_summary, short_summaries_txt ?: [], short_summaries_json ?: [], busco_dir] } + ch_all_busco_outputs + .map { meta, outputs -> + [ + meta, + meta.lineage_name, + outputs.batch_summary ?: [], + outputs.short_summaries_txt ?: [], + outputs.short_summaries_json ?: [], + outputs.full_table ?: [], + outputs.missing_busco_list ?: [], + outputs.seq_dir ? "${outputs.seq_dir}/single_copy_busco_sequences" : [], + outputs.seq_dir ? "${outputs.seq_dir}/multi_copy_busco_sequences" : [], + outputs.seq_dir ? "${outputs.seq_dir}/fragmented_busco_sequences" : [], + outputs.busco_dir ? "${outputs.busco_dir}/hmmer_output" : [] + ] + } ) - ch_versions = ch_versions.mix ( RESTRUCTUREBUSCODIR.out.versions.first() ) - // // Select input for BLOBTOOLKIT_EXTRACTBUSCOS // - BUSCO.out.seq_dir - | filter { meta, seq -> basal_lineages.contains(meta.lineage_name) } - | map { meta, seq -> seq } - | collect - | set { ch_basal_buscos } - + ch_all_busco_outputs + .filter { meta, outputs -> basal_lineages.contains(meta.lineage_name) } + .map { meta, outputs -> [meta, outputs.seq_dir] } + .collect { it[1] } + .set { ch_basal_buscos } // Extract BUSCO genes from the basal lineages BLOBTOOLKIT_EXTRACTBUSCOS ( fasta, ch_basal_buscos ) ch_versions = ch_versions.mix ( BLOBTOOLKIT_EXTRACTBUSCOS.out.versions.first() ) - // // Align BUSCO genes against the BLASTp database // BLOBTOOLKIT_EXTRACTBUSCOS.out.genes - | filter { it[1].size() > 140 } - | set { ch_busco_genes } + .filter { it[1].size() > 140 } + .set { ch_busco_genes } // Hardcoded to match the format expected by blobtools def outext = 'txt' @@ -96,29 +171,37 @@ workflow BUSCO_DIAMOND { DIAMOND_BLASTP ( ch_busco_genes, blastp, outext, cols, taxon_id ) ch_versions = ch_versions.mix ( DIAMOND_BLASTP.out.versions.first() ) - // Order BUSCO results according to the lineage index - BUSCO.out.full_table - // 1. Restore the original meta map, and pull the index as an extra tuple element - | map { meta, table -> [meta.findAll { it.key != "lineage_name" && it.key != "lineage_index" }, [table, meta.lineage_index]] } - // 2. Turn to a single-element channel that has the (one and only) meta map, and all the pairs (table, lineage index) concatenated as a list - | groupTuple() - // 3. Sort the pairs and discard the index - | map { meta, table_positions -> [ meta, table_positions.sort { a, b -> a[1] <=> b[1] } . collect { table, lineage_index -> table } ] } - | set { ch_indexed_buscos } - + ch_all_busco_outputs + // 0. Filter out the BUSCO results that found no gene (seen for archaea/bacteria) + | filter { meta, outputs -> outputs.full_table } + // 1. Extract the necessary information and create a consistent structure + | map { meta, outputs -> + def cleaned_meta = meta.findAll { it.key != "lineage_name" && it.key != "lineage_index" && it.key != "busco_dir" } + def full_table = outputs.full_table + def lineage_index = meta.lineage_index + [cleaned_meta, [full_table, lineage_index]] + } + // 2. Group by the cleaned meta information + | groupTuple(by: 0) + // 3. Sort the tables by lineage index and collect only the tables + | map { meta, table_positions -> + [ + meta, + table_positions.sort { a, b -> a[1] <=> b[1] }.collect { it[0] } + ] + } + | set { ch_indexed_buscos } // Select BUSCO results for taxonomically closest database ch_indexed_buscos - | map { meta, tables -> [meta, tables[0]] } - | set { ch_first_table } - + .map { meta, tables -> [meta, tables[0]] } + .set { ch_first_table } // BUSCO results for MULTIQC - BUSCO.out.short_summaries_txt - | ifEmpty ( [ [], [] ] ) - | set { multiqc } - + BUSCO_BUSCO.out.short_summaries_txt + .map { meta, outputs -> outputs } + .set { multiqc } emit: first_table = ch_first_table // channel: [ val(meta), path(full_table) ] diff --git a/subworkflows/local/finalise_blobdir.nf b/subworkflows/local/finalise_blobdir.nf index 352cb01b..04dfe8d2 100644 --- a/subworkflows/local/finalise_blobdir.nf +++ b/subworkflows/local/finalise_blobdir.nf @@ -8,7 +8,6 @@ include { COMPRESSBLOBDIR } from '../../modules/local/compressblobdir' workflow FINALISE_BLOBDIR { take: blobdir // channel: [ val(meta), path(blobdir) ] - reads // channel: [ [meta, reads] ] software // channel: [ val(meta), path(software_yml) ] summary // channel: [ val(meta), path(summary_json) ] @@ -17,17 +16,9 @@ workflow FINALISE_BLOBDIR { ch_versions = Channel.empty() // - // MODULE: Update meta json file + // MODULE: Update the software listed in the meta json file // - BLOBTOOLKIT_UPDATEMETA ( - blobdir, - reads, - software, - params.blastp, - params.blastx, - params.blastn, - params.taxdump, - ) + BLOBTOOLKIT_UPDATEMETA ( blobdir, software ) // // MODULE: Compress all the json files diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 7a8fd112..32507e01 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -2,35 +2,76 @@ // Check input samplesheet and get aligned read channels // +include { samplesheetToList } from 'plugin/nf-schema' + + +include { UNTAR } from '../../modules/nf-core/untar/main' include { CAT_CAT } from '../../modules/nf-core/cat/cat/main' include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/samtools/flagstat/main' -include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' -include { FETCHNGSSAMPLESHEET_CHECK } from '../../modules/local/fetchngssamplesheet_check' include { GENERATE_CONFIG } from '../../modules/local/generate_config' +include { JSONIFY_TAXDUMP } from '../../modules/local/jsonify_taxdump' workflow INPUT_CHECK { take: - samplesheet // file: /path/to/samplesheet.csv - fasta // channel: [ meta, path(fasta) ] - taxon // channel: val(taxon) - busco_lin // channel: val([busco_lin]) - lineage_tax_ids // channel: /path/to/lineage_tax_ids - blastn // channel: [ val(meta), path(blastn_db) ] + samplesheet // channel: /path/to/samplesheet + fasta // channel: [ meta, path(fasta) ] + taxon // channel: val(taxon) + busco_lin // channel: val([busco_lin]) + lineage_tax_ids // channel: /path/to/lineage_tax_ids + databases main: ch_versions = Channel.empty() + // + // SUBWORKFLOW: Decompress databases if needed + // + + // Check which need to be decompressed + ch_dbs_for_untar = databases + .branch { db_meta, db_path -> + untar: db_path.name.endsWith( ".tar.gz" ) + skip: true + } + + // Untar the databases + UNTAR ( ch_dbs_for_untar.untar ) + ch_versions = ch_versions.mix( UNTAR.out.versions.first() ) + + // Join and format dbs + ch_databases = ch_dbs_for_untar.skip + .mix( UNTAR.out.untar ) + .map { meta, db -> [ meta + [id: db.baseName], db] } + .map { db_meta, db_path -> + if (db_meta.type in ["blastp", "blastx"] && db_path.isDirectory()) { + [db_meta, file(db_path.toString() + "/${db_path.name}", checkIfExists: true)] + } else { + [db_meta, db_path] + } + } + .branch { db_meta, db_path -> + blastn: db_meta.type == "blastn" + blastp: db_meta.type == "blastp" + blastx: db_meta.type == "blastx" + precomputed_busco: db_meta.type == "precomputed_busco" + busco: db_meta.type == "busco" + taxdump: db_meta.type == "taxdump" + } + + // + // SUBWORKFLOW: Process samplesheet + // if ( params.fetchngs_samplesheet ) { - FETCHNGSSAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',', quote:'"' ) + Channel + .fromList(samplesheetToList(samplesheet, "assets/schema_fetchngs_input.json")) + .map {it[0]} .branch { row -> paired: row.fastq_2 + // Reformat for CAT_CAT [[id: row.run_accession, row:row], [row.fastq_1, row.fastq_2]] not_paired: true } .set { reads_pairedness } - ch_versions = ch_versions.mix ( FETCHNGSSAMPLESHEET_CHECK.out.versions.first() ) CAT_CAT ( reads_pairedness.paired ) ch_versions = ch_versions.mix ( CAT_CAT.out.versions.first() ) @@ -42,12 +83,10 @@ workflow INPUT_CHECK { | set { read_files } } else { - SAMPLESHEET_CHECK ( samplesheet ) - .csv - .splitCsv ( header:true, sep:',' ) - .map { create_data_channels(it) } + Channel + .fromList(samplesheetToList(samplesheet, "assets/schema_input.json")) + .map { check_data_channel(it) } .set { read_files } - ch_versions = ch_versions.mix ( SAMPLESHEET_CHECK.out.versions.first() ) } @@ -61,12 +100,21 @@ workflow INPUT_CHECK { | set { reads } + // Get the source paths of all the databases, except Busco which is not recorded in the blobDir meta.json + databases + | filter { meta, file -> meta.type != "busco" && meta.type != "precomputed_busco" } + | map {meta, file -> [meta, file.toUriString()]} + | set { db_paths } + + GENERATE_CONFIG ( fasta, taxon, busco_lin, + ch_databases.blastn, lineage_tax_ids, - blastn, + reads.collect(flat: false).ifEmpty([]), + db_paths.collect(flat: false), ) ch_versions = ch_versions.mix(GENERATE_CONFIG.out.versions.first()) @@ -102,37 +150,83 @@ workflow INPUT_CHECK { | collect | set { ch_busco_lineages } + // Format pre-computed BUSCOs (if provided) + // Parse the BUSCO output directories + ch_parsed_busco = ch_databases.precomputed_busco + .flatMap { meta, dir -> + def subdirs = file(dir).listFiles().findAll { it.isDirectory() } + subdirs.collect { subdir -> + def lineage = subdir.name.startsWith('run_') ? subdir.name.substring(4) : subdir.name + [[type: 'precomputed_busco', id: subdir.name, lineage: lineage], subdir] + } + } + + // Remove any invalid lineages from precomputed_busco + ch_busco_lineages_list = ch_busco_lineages.flatten() + ch_parsed_busco_filtered = ch_parsed_busco + .filter { meta, path -> + ch_busco_lineages.contains(meta.lineage) + } + ch_parsed_busco_filtered = ch_parsed_busco_filtered.ifEmpty { Channel.value([]) } + + // + // Get the BUSCO path if set + // + ch_databases.busco + | map { _, db_path -> db_path } + | ifEmpty( [] ) + | set { ch_busco_db } + + + // + // Convert the taxdump to a JSON file if there isn't one yet + // + ch_databases.taxdump + | filter { meta, db_path -> ! db_path.isFile() } + | map { meta, db_path -> [meta, db_path, db_path.listFiles().find { it.getName().endsWith('.json') }] } + | branch { meta, db_path, json_path -> + json: json_path + return [meta, json_path] + dir: true + return [meta, db_path] + } + | set { taxdump_dirs } + + JSONIFY_TAXDUMP( taxdump_dirs.dir ) + ch_versions = ch_versions.mix(JSONIFY_TAXDUMP.out.versions.first()) + + ch_databases.taxdump + | filter { meta, db_path -> db_path.isFile() } + | mix ( taxdump_dirs.json ) + | mix( JSONIFY_TAXDUMP.out.json ) + | map { _, db_path -> db_path } + | set { ch_taxdump } + emit: reads // channel: [ val(meta), path(datafile) ] config = GENERATE_CONFIG.out.yaml // channel: [ val(meta), path(yaml) ] + synonyms_tsv = GENERATE_CONFIG.out.synonyms_tsv // channel: [ val(meta), path(tsv) ] + categories_tsv = GENERATE_CONFIG.out.categories_tsv // channel: [ val(meta), path(tsv) ] taxon_id = ch_taxon_id // channel: val(taxon_id) busco_lineages = ch_busco_lineages // channel: val([busco_lin]) + blastn = ch_databases.blastn.first() // channel: [ val(meta), path(blastn_db) ] + blastp = ch_databases.blastp.first() // channel: [ val(meta), path(blastp_db) ] + blastx = ch_databases.blastx.first() // channel: [ val(meta), path(blastx_db) ] + precomputed_busco = ch_parsed_busco // channel: [ val(meta), path(busco_run_dir) ] + busco_db = ch_busco_db.first() // channel: [ path(busco_db) ] + taxdump = ch_taxdump.first() // channel: [ path(taxdump) ] versions = ch_versions // channel: [ versions.yml ] } // Function to get list of [ meta, datafile ] -def create_data_channels(LinkedHashMap row) { - // create meta map - def meta = [:] - meta.id = row.sample - meta.datatype = row.datatype - +def check_data_channel(meta, datafile) { - // add path(s) of the read file(s) to the meta map - def data_meta = [] - - if ( !params.align && !row.datafile.endsWith(".bam") && !row.datafile.endsWith(".cram") ) { - exit 1, "ERROR: Please check input samplesheet and pipeline parameters -> Data file is in FastA/FastQ format but --align is not set!\n${row.datafile}" + if ( !params.align && !datafile.name.endsWith(".bam") && !datafile.name.endsWith(".cram") ) { + error("ERROR: Please check input samplesheet and pipeline parameters -> Data file is in FastA/FastQ format but --align is not set!\n${datafile}") } - if ( !file(row.datafile).exists() ) { - exit 1, "ERROR: Please check input samplesheet -> Data file does not exist!\n${row.datafile}" - } else { - data_meta = [ meta, file(row.datafile) ] - } - - return data_meta + return [meta, datafile] } // Function to get list of [ meta, datafile ] @@ -140,6 +234,7 @@ def create_data_channels_from_fetchngs(LinkedHashMap row) { // create meta map def meta = [:] meta.id = row.run_accession + meta.layout = row.library_layout // Same as https://github.com/blobtoolkit/blobtoolkit/blob/4.3.3/src/blobtoolkit-pipeline/src/lib/functions.py#L30-L39 // with the addition of "hic" @@ -157,17 +252,7 @@ def create_data_channels_from_fetchngs(LinkedHashMap row) { meta.datatype = "illumina" } - - // add path(s) of the read file(s) to the meta map - def data_meta = [] - - if ( !file(row.fastq_1).exists() ) { - exit 1, "ERROR: Please check input samplesheet -> Data file does not exist!\n${row.fastq_1}" - } else { - data_meta = [ meta, file(row.fastq_1) ] - } - - return data_meta + return [ meta, file(row.fastq_1) ] } // Function to get the read counts from a samtools flagstat file diff --git a/subworkflows/local/minimap_alignment.nf b/subworkflows/local/minimap_alignment.nf index 0c25f4c7..c05f5179 100644 --- a/subworkflows/local/minimap_alignment.nf +++ b/subworkflows/local/minimap_alignment.nf @@ -2,7 +2,6 @@ // Optional alignment subworkflow using Minimap2 // -include { SAMTOOLS_FASTA } from '../../modules/nf-core/samtools/fasta/main' include { MINIMAP2_ALIGN as MINIMAP2_HIC } from '../../modules/nf-core/minimap2/align/main' include { MINIMAP2_ALIGN as MINIMAP2_ILMN } from '../../modules/nf-core/minimap2/align/main' include { MINIMAP2_ALIGN as MINIMAP2_CCS } from '../../modules/nf-core/minimap2/align/main' @@ -20,24 +19,8 @@ workflow MINIMAP2_ALIGNMENT { ch_versions = Channel.empty() - // Convert BAM/CRAM reads to FASTA - input - | branch { - meta, reads -> - fasta: reads.toString().endsWith(".fasta") || reads.toString().endsWith(".fasta.gz") || reads.toString().endsWith(".fa") || reads.toString().endsWith(".fa.gz") - fastq: reads.toString().endsWith(".fastq") || reads.toString().endsWith(".fastq.gz") || reads.toString().endsWith(".fq") || reads.toString().endsWith(".fq.gz") - bamcram: true - } - | set { ch_reads_by_type } - - SAMTOOLS_FASTA ( ch_reads_by_type.bamcram, true ) - ch_versions = ch_versions.mix(SAMTOOLS_FASTA.out.versions.first()) - - // Branch input by sequencing type - SAMTOOLS_FASTA.out.interleaved - | mix ( ch_reads_by_type.fasta ) - | mix ( ch_reads_by_type.fastq ) + input | branch { meta, reads -> hic: meta.datatype == "hic" @@ -50,19 +33,19 @@ workflow MINIMAP2_ALIGNMENT { // Align with Minimap2 - MINIMAP2_HIC ( ch_input.hic, fasta, true, false, false ) + MINIMAP2_HIC ( ch_input.hic, fasta, true, false, false, false ) ch_versions = ch_versions.mix(MINIMAP2_HIC.out.versions.first()) - MINIMAP2_ILMN ( ch_input.illumina, fasta, true, false, false ) + MINIMAP2_ILMN ( ch_input.illumina, fasta, true, false, false, false ) ch_versions = ch_versions.mix(MINIMAP2_ILMN.out.versions.first()) - MINIMAP2_CCS ( ch_input.pacbio, fasta, true, false, false ) + MINIMAP2_CCS ( ch_input.pacbio, fasta, true, false, false, false ) ch_versions = ch_versions.mix(MINIMAP2_CCS.out.versions.first()) - MINIMAP2_CLR ( ch_input.clr, fasta, true, false, false ) + MINIMAP2_CLR ( ch_input.clr, fasta, true, false, false, false ) ch_versions = ch_versions.mix(MINIMAP2_CLR.out.versions.first()) - MINIMAP2_ONT ( ch_input.ont, fasta, true, false, false ) + MINIMAP2_ONT ( ch_input.ont, fasta, true, false, false, false ) ch_versions = ch_versions.mix(MINIMAP2_ONT.out.versions.first()) diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf index a1f03980..5430d09b 100644 --- a/subworkflows/local/prepare_genome.nf +++ b/subworkflows/local/prepare_genome.nf @@ -9,27 +9,33 @@ include { WINDOWMASKER_USTAT } from '../../modules/nf-core/windowmasker/ustat workflow PREPARE_GENOME { take: - fasta // channel: [ meta, path(genome) ] + genome // channel: [ meta, path(fasta) ] main: ch_versions = Channel.empty() + // + // LOGIC: Identify the compressed files + // + ch_genomes_for_gunzip = genome + .branch { meta, fasta -> + gunzip: fasta.name.endsWith( ".gz" ) + skip: true + } // - // MODULE: Decompress FASTA file if needed + // MODULE: Decompress compressed FASTA files // - if ( params.fasta.endsWith('.gz') ) { - ch_unzipped = GUNZIP ( fasta ).gunzip - ch_versions = ch_versions.mix ( GUNZIP.out.versions ) - } else { - ch_unzipped = fasta - } + GUNZIP ( ch_genomes_for_gunzip.gunzip ) + ch_versions = ch_versions.mix ( GUNZIP.out.versions.first() ) + // // LOGIC: Extract the genome size for decision making downstream // - ch_unzipped + ch_genomes_for_gunzip.skip + | mix( GUNZIP.out.gunzip ) | map { meta, fa -> [ meta + [genome_size: fa.size()], fa] } | set { ch_genome } @@ -38,10 +44,10 @@ workflow PREPARE_GENOME { // if ( params.mask ) { WINDOWMASKER_MKCOUNTS ( ch_genome ) - ch_versions = ch_versions.mix ( WINDOWMASKER_MKCOUNTS.out.versions ) + ch_versions = ch_versions.mix ( WINDOWMASKER_MKCOUNTS.out.versions.first() ) WINDOWMASKER_USTAT ( WINDOWMASKER_MKCOUNTS.out.counts, ch_genome ) - ch_versions = ch_versions.mix ( WINDOWMASKER_USTAT.out.versions ) + ch_versions = ch_versions.mix ( WINDOWMASKER_USTAT.out.versions.first() ) ch_fasta = WINDOWMASKER_USTAT.out.intervals } else { diff --git a/subworkflows/local/run_blastx.nf b/subworkflows/local/run_blastx.nf index 715e5ae2..cfcb0219 100644 --- a/subworkflows/local/run_blastx.nf +++ b/subworkflows/local/run_blastx.nf @@ -19,7 +19,7 @@ workflow RUN_BLASTX { // - // Create metadata summary file + // Split the sequences // BLOBTOOLKIT_CHUNK ( fasta, table ) ch_versions = ch_versions.mix ( BLOBTOOLKIT_CHUNK.out.versions.first() ) diff --git a/subworkflows/local/utils_nfcore_blobtoolkit_pipeline/main.nf b/subworkflows/local/utils_nfcore_blobtoolkit_pipeline/main.nf new file mode 100644 index 00000000..23005275 --- /dev/null +++ b/subworkflows/local/utils_nfcore_blobtoolkit_pipeline/main.nf @@ -0,0 +1,204 @@ +// +// Subworkflow with functionality specific to the sanger-tol/blobtoolkit pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' +include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' +include { imNotification } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline' +include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW TO INITIALISE PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_INITIALISATION { + + take: + version // boolean: Display version and exit + validate_params // boolean: Boolean whether to validate parameters against the schema at runtime + monochrome_logs // boolean: Do not use coloured log outputs + nextflow_cli_args // array: List of positional nextflow CLI args + outdir // string: The output directory where the results will be saved + + main: + + ch_versions = Channel.empty() + + // + // Print version and exit if required and dump pipeline parameters to JSON file + // + UTILS_NEXTFLOW_PIPELINE ( + version, + true, + outdir, + workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1 + ) + + // + // Validate parameters and generate parameter summary to stdout + // + UTILS_NFSCHEMA_PLUGIN ( + workflow, + validate_params, + null + ) + + // + // Check config provided to the pipeline + // + UTILS_NFCORE_PIPELINE ( + nextflow_cli_args + ) + + if (params.fetchngs_samplesheet && !params.align) { + error('--align not specified, even though the input samplesheet is a nf-core/fetchngs one - i.e has fastq files!') + } + + if (file(params.taxdump).isFile() && !params.taxdump.endsWith('.json') && !params.taxdump.endsWith('.tar.gz')) { + error('--taxdump can take either a JSON file, a tar.gz archive, or a directory') + } + + ch_fasta = Channel.value([ [ 'id': params.accession ?: file(params.fasta.replace(".gz", "")).baseName ], file(params.fasta) ]) + + Channel.empty() + .concat( Channel.fromPath(params.blastn).map { tuple(["type": "blastn"], it) } ) + .concat( Channel.fromPath(params.blastx).map { tuple(["type": "blastx"], it) } ) + .concat( Channel.fromPath(params.blastp).map { tuple(["type": "blastp"], it) } ) + .concat( params.precomputed_busco ? Channel.fromPath(params.precomputed_busco).map { tuple([ "type": "precomputed_busco"], it ) } : Channel.empty() ) + .concat( params.busco ? Channel.fromPath(params.busco).map { tuple([ "type": "busco"], it ) } : Channel.empty() ) + .concat( Channel.fromPath(params.taxdump).map { tuple(["type": "taxdump"], it) } ) + .set { ch_databases } + + emit: + versions = ch_versions + fasta = ch_fasta + databases = ch_databases +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW FOR PIPELINE COMPLETION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow PIPELINE_COMPLETION { + + take: + email // string: email address + email_on_fail // string: email address sent on pipeline failure + plaintext_email // boolean: Send plain-text email instead of HTML + outdir // path: Path to output directory where results will be published + monochrome_logs // boolean: Disable ANSI colour codes in log output + hook_url // string: hook URL for notifications + multiqc_report // string: Path to MultiQC report + + main: + summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json") + def multiqc_reports = multiqc_report.toList() + + // + // Completion email and summary + // + workflow.onComplete { + if (email || email_on_fail) { + completionEmail( + summary_params, + email, + email_on_fail, + plaintext_email, + outdir, + monochrome_logs, + multiqc_reports.getVal(), + ) + } + + completionSummary(monochrome_logs) + if (hook_url) { + imNotification(summary_params, hook_url) + } + } + + workflow.onError { + log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting" + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Generate methods description for MultiQC +// +def toolCitationText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text +} + +def toolBibliographyText() { + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text +} + +def methodsDescriptionText(mqc_methods_yaml) { + // Convert to a named map so can be used as with familiar NXF ${workflow} variable syntax in the MultiQC YML file + def meta = [:] + meta.workflow = workflow.toMap() + meta["manifest_map"] = workflow.manifest.toMap() + + // Pipeline DOI + if (meta.manifest_map.doi) { + // Using a loop to handle multiple DOIs + // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers + // Removing ` ` since the manifest.doi is a string and not a proper list + def temp_doi_ref = "" + def manifest_doi = meta.manifest_map.doi.tokenize(",") + manifest_doi.each { doi_ref -> + temp_doi_ref += "(doi: ${doi_ref.replace("https://doi.org/", "").replace(" ", "")}), " + } + meta["doi_text"] = temp_doi_ref.substring(0, temp_doi_ref.length() - 2) + } else meta["doi_text"] = "" + meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + // meta["tool_bibliography"] = toolBibliographyText() + + + def methods_text = mqc_methods_yaml.text + + def engine = new groovy.text.SimpleTemplateEngine() + def description_html = engine.createTemplate(methods_text).make(meta) + + return description_html.toString() +} + diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf new file mode 100644 index 00000000..eb732470 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -0,0 +1,126 @@ +// +// Subworkflow with functionality that may be useful for any Nextflow pipeline +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NEXTFLOW_PIPELINE { + take: + print_version // boolean: print version + dump_parameters // boolean: dump parameters + outdir // path: base directory used to publish pipeline results + check_conda_channels // boolean: check conda channels + + main: + + // + // Print workflow version and exit on --version + // + if (print_version) { + log.info("${workflow.manifest.name} ${getWorkflowVersion()}") + System.exit(0) + } + + // + // Dump pipeline parameters to a JSON file + // + if (dump_parameters && outdir) { + dumpParametersToJSON(outdir) + } + + // + // When running with Conda, warn if channels have not been set-up appropriately + // + if (check_conda_channels) { + checkCondaChannels() + } + + emit: + dummy_emit = true +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Generate version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Dump pipeline parameters to a JSON file +// +def dumpParametersToJSON(outdir) { + def timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss') + def filename = "params_${timestamp}.json" + def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") + def jsonStr = groovy.json.JsonOutput.toJson(params) + temp_pf.text = groovy.json.JsonOutput.prettyPrint(jsonStr) + + nextflow.extension.FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/blobtoolkit/params_${timestamp}.json") + temp_pf.delete() +} + +// +// When running with -profile conda, warn if channels have not been set-up appropriately +// +def checkCondaChannels() { + def parser = new org.yaml.snakeyaml.Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } + catch (NullPointerException e) { + log.debug(e) + log.warn("Could not verify conda channel configuration.") + return null + } + catch (IOException e) { + log.debug(e) + log.warn("Could not verify conda channel configuration.") + return null + } + + // Check that all channels are present + // This channel list is ordered by required channel priority. + def required_channels_in_order = ['conda-forge', 'bioconda'] + def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + + // Check that they are in the right order + def channel_priority_violation = required_channels_in_order != channels.findAll { ch -> ch in required_channels_in_order } + + if (channels_missing | channel_priority_violation) { + log.warn """\ + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + There is a problem with your Conda configuration! + You will need to set-up the conda-forge and bioconda channels correctly. + Please refer to https://bioconda.github.io/ + The observed channel order is + ${channels} + but the following channel order is required: + ${required_channels_in_order} + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + """.stripIndent(true) + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml new file mode 100644 index 00000000..e5c3a0a8 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml @@ -0,0 +1,38 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NEXTFLOW_PIPELINE" +description: Subworkflow with functionality that may be useful for any Nextflow pipeline +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - print_version: + type: boolean + description: | + Print the version of the pipeline and exit + - dump_parameters: + type: boolean + description: | + Dump the parameters of the pipeline to a JSON file + - output_directory: + type: directory + description: Path to output dir to write JSON file to. + pattern: "results/" + - check_conda_channel: + type: boolean + description: | + Check if the conda channel priority is correct. +output: + - dummy_emit: + type: boolean + description: | + Dummy emit to make nf-core subworkflows lint happy +authors: + - "@adamrtalbot" + - "@drpatelh" +maintainers: + - "@adamrtalbot" + - "@drpatelh" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..68718e4f --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test @@ -0,0 +1,54 @@ + +nextflow_function { + + name "Test Functions" + script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Test Function getWorkflowVersion") { + + function "getWorkflowVersion" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function dumpParametersToJSON") { + + function "dumpParametersToJSON" + + when { + function { + """ + // define inputs of the function here. Example: + input[0] = "$outputDir" + """.stripIndent() + } + } + + then { + assertAll( + { assert function.success } + ) + } + } + + test("Test Function checkCondaChannels") { + + function "checkCondaChannels" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..e3f0baf4 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,20 @@ +{ + "Test Function getWorkflowVersion": { + "content": [ + "v9.9.9" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:05.308243" + }, + "Test Function checkCondaChannels": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:12.425833" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..02dbf094 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,113 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NEXTFLOW_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config" + workflow "UTILS_NEXTFLOW_PIPELINE" + tag 'subworkflows' + tag 'utils_nextflow_pipeline' + tag 'subworkflows/utils_nextflow_pipeline' + + test("Should run no inputs") { + + when { + workflow { + """ + print_version = false + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should print version") { + + when { + workflow { + """ + print_version = true + dump_parameters = false + outdir = null + check_conda_channels = false + + input[0] = print_version + input[1] = dump_parameters + input[2] = outdir + input[3] = check_conda_channels + """ + } + } + + then { + expect { + with(workflow) { + assert success + assert "nextflow_workflow v9.9.9" in stdout + } + } + } + } + + test("Should dump params") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = 'results' + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should not create params JSON if no output directory") { + + when { + workflow { + """ + print_version = false + dump_parameters = true + outdir = null + check_conda_channels = false + + input[0] = false + input[1] = true + input[2] = outdir + input[3] = false + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config new file mode 100644 index 00000000..a09572e5 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml new file mode 100644 index 00000000..f8476112 --- /dev/null +++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nextflow_pipeline: + - subworkflows/nf-core/utils_nextflow_pipeline/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf new file mode 100644 index 00000000..bfd25876 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -0,0 +1,419 @@ +// +// Subworkflow with utility functions specific to the nf-core pipeline template +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + SUBWORKFLOW DEFINITION +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow UTILS_NFCORE_PIPELINE { + take: + nextflow_cli_args + + main: + valid_config = checkConfigProvided() + checkProfileProvided(nextflow_cli_args) + + emit: + valid_config +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// Warn if a -profile or Nextflow config has not been provided to run the pipeline +// +def checkConfigProvided() { + def valid_config = true as Boolean + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn( + "[${workflow.manifest.name}] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " + ) + valid_config = false + } + return valid_config +} + +// +// Exit pipeline if --profile contains spaces +// +def checkProfileProvided(nextflow_cli_args) { + if (workflow.profile.endsWith(',')) { + error( + "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } + if (nextflow_cli_args[0]) { + log.warn( + "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" + ) + } +} + +// +// Generate workflow version string +// +def getWorkflowVersion() { + def version_string = "" as String + if (workflow.manifest.version) { + def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' + version_string += "${prefix_v}${workflow.manifest.version}" + } + + if (workflow.commitId) { + def git_shortsha = workflow.commitId.substring(0, 7) + version_string += "-g${git_shortsha}" + } + + return version_string +} + +// +// Get software versions for pipeline +// +def processVersionsFromYAML(yaml_file) { + def yaml = new org.yaml.snakeyaml.Yaml() + def versions = yaml.load(yaml_file).collectEntries { k, v -> [k.tokenize(':')[-1], v] } + return yaml.dumpAsMap(versions).trim() +} + +// +// Get workflow version for pipeline +// +def workflowVersionToYAML() { + return """ + Workflow: + ${workflow.manifest.name}: ${getWorkflowVersion()} + Nextflow: ${workflow.nextflow.version} + """.stripIndent().trim() +} + +// +// Get channel of software versions used in pipeline in YAML format +// +def softwareVersionsToYAML(ch_versions) { + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) +} + +// +// Get workflow summary for MultiQC +// +def paramsSummaryMultiqc(summary_params) { + def summary_section = '' + summary_params + .keySet() + .each { group -> + def group_params = summary_params.get(group) + // This gets the parameters of that particular group + if (group_params) { + summary_section += "

    ${group}

    \n" + summary_section += "
    \n" + group_params + .keySet() + .sort() + .each { param -> + summary_section += "
    ${param}
    ${group_params.get(param) ?: 'N/A'}
    \n" + } + summary_section += "
    \n" + } + } + + def yaml_file_text = "id: '${workflow.manifest.name.replace('/', '-')}-summary'\n" as String + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + + return yaml_file_text +} + +// +// ANSII colours used for terminal logging +// +def logColours(monochrome_logs=true) { + def colorcodes = [:] as Map + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes +} + +// Return a single report from an object that may be a Path or List +// +def getSingleReport(multiqc_reports) { + if (multiqc_reports instanceof Path) { + return multiqc_reports + } else if (multiqc_reports instanceof List) { + if (multiqc_reports.size() == 0) { + log.warn("[${workflow.manifest.name}] No reports found from process 'MULTIQC'") + return null + } else if (multiqc_reports.size() == 1) { + return multiqc_reports.first() + } else { + log.warn("[${workflow.manifest.name}] Found multiple reports from process 'MULTIQC', will use only one") + return multiqc_reports.first() + } + } else { + return null + } +} + +// +// Construct and send completion email +// +def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { + + // Set up the e-mail variables + def subject = "[${workflow.manifest.name}] Successful: ${workflow.runName}" + if (!workflow.success) { + subject = "[${workflow.manifest.name}] FAILED: ${workflow.runName}" + } + + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) { + misc_fields['Pipeline repository Git URL'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['Pipeline repository Git Commit'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['Pipeline Git branch/tag'] = workflow.revision + } + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = getWorkflowVersion() + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = getSingleReport(multiqc_report) + + // Check if we are only sending emails on failure + def email_address = email + if (!email && email_on_fail && !workflow.success) { + email_address = email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("${workflow.projectDir}/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("${workflow.projectDir}/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as MemoryUnit + def smail_fields = [email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes()] + def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + def colors = logColours(monochrome_logs) as Map + if (email_address) { + try { + if (plaintext_email) { + new org.codehaus.groovy.GroovyException('Send plaintext e-mail, not HTML') + } + // Try to send HTML e-mail using sendmail + def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html") + sendmail_tf.withWriter { w -> w << sendmail_html } + ['sendmail', '-t'].execute() << sendmail_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (sendmail)-") + } + catch (Exception msg) { + log.debug(msg.toString()) + log.debug("Trying with mail instead of sendmail") + // Catch failures and try with plaintext + def mail_cmd = ['mail', '-s', subject, '--content-type=text/html', email_address] + mail_cmd.execute() << email_html + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Sent summary e-mail to ${email_address} (mail)-") + } + } + + // Write summary e-mail HTML to a file + def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + nextflow.extension.FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html") + output_hf.delete() + + // Write summary e-mail TXT to a file + def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + nextflow.extension.FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt") + output_tf.delete() +} + +// +// Print pipeline summary on completion +// +def completionSummary(monochrome_logs=true) { + def colors = logColours(monochrome_logs) as Map + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.green} Pipeline completed successfully${colors.reset}-") + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-") + } + } + else { + log.info("-${colors.purple}[${workflow.manifest.name}]${colors.red} Pipeline completed with errors${colors.reset}-") + } +} + +// +// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack +// +def imNotification(summary_params, hook_url) { + def summary = [:] + summary_params + .keySet() + .sort() + .each { group -> + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['start'] = workflow.start + misc_fields['complete'] = workflow.complete + misc_fields['scriptfile'] = workflow.scriptFile + misc_fields['scriptid'] = workflow.scriptId + if (workflow.repository) { + misc_fields['repository'] = workflow.repository + } + if (workflow.commitId) { + misc_fields['commitid'] = workflow.commitId + } + if (workflow.revision) { + misc_fields['revision'] = workflow.revision + } + misc_fields['nxf_version'] = workflow.nextflow.version + misc_fields['nxf_build'] = workflow.nextflow.build + misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp + + def msg_fields = [:] + msg_fields['version'] = getWorkflowVersion() + msg_fields['runName'] = workflow.runName + msg_fields['success'] = workflow.success + msg_fields['dateComplete'] = workflow.complete + msg_fields['duration'] = workflow.duration + msg_fields['exitStatus'] = workflow.exitStatus + msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + msg_fields['errorReport'] = (workflow.errorReport ?: 'None') + msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "") + msg_fields['projectDir'] = workflow.projectDir + msg_fields['summary'] = summary << misc_fields + + // Render the JSON template + def engine = new groovy.text.GStringTemplateEngine() + // Different JSON depending on the service provider + // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format + def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json" + def hf = new File("${workflow.projectDir}/assets/${json_path}") + def json_template = engine.createTemplate(hf).make(msg_fields) + def json_message = json_template.toString() + + // POST + def post = new URL(hook_url).openConnection() + post.setRequestMethod("POST") + post.setDoOutput(true) + post.setRequestProperty("Content-Type", "application/json") + post.getOutputStream().write(json_message.getBytes("UTF-8")) + def postRC = post.getResponseCode() + if (!postRC.equals(200)) { + log.warn(post.getErrorStream().getText()) + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml new file mode 100644 index 00000000..d08d2434 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml @@ -0,0 +1,24 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "UTILS_NFCORE_PIPELINE" +description: Subworkflow with utility functions specific to the nf-core pipeline template +keywords: + - utility + - pipeline + - initialise + - version +components: [] +input: + - nextflow_cli_args: + type: list + description: | + Nextflow CLI positional arguments +output: + - success: + type: boolean + description: | + Dummy output to indicate success +authors: + - "@adamrtalbot" +maintainers: + - "@adamrtalbot" + - "@maxulysse" diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test new file mode 100644 index 00000000..f117040c --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test @@ -0,0 +1,126 @@ + +nextflow_function { + + name "Test Functions" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Test Function checkConfigProvided") { + + function "checkConfigProvided" + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function checkProfileProvided") { + + function "checkProfileProvided" + + when { + function { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function without logColours") { + + function "logColours" + + when { + function { + """ + input[0] = true + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function with logColours") { + function "logColours" + + when { + function { + """ + input[0] = false + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert snapshot(function.result).match() } + ) + } + } + + test("Test Function getSingleReport with a single file") { + function "getSingleReport" + + when { + function { + """ + input[0] = file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert function.result.contains("test.tsv") } + ) + } + } + + test("Test Function getSingleReport with multiple files") { + function "getSingleReport" + + when { + function { + """ + input[0] = [ + file(params.modules_testdata_base_path + '/generic/tsv/test.tsv', checkIfExists: true), + file(params.modules_testdata_base_path + '/generic/tsv/network.tsv', checkIfExists: true), + file(params.modules_testdata_base_path + '/generic/tsv/expression.tsv', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert function.success }, + { assert function.result.contains("test.tsv") }, + { assert !function.result.contains("network.tsv") }, + { assert !function.result.contains("expression.tsv") } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap new file mode 100644 index 00000000..02c67014 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap @@ -0,0 +1,136 @@ +{ + "Test Function checkProfileProvided": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:03.360873" + }, + "Test Function checkConfigProvided": { + "content": [ + true + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:02:59.729647" + }, + "Test Function without logColours": { + "content": [ + { + "reset": "", + "bold": "", + "dim": "", + "underlined": "", + "blink": "", + "reverse": "", + "hidden": "", + "black": "", + "red": "", + "green": "", + "yellow": "", + "blue": "", + "purple": "", + "cyan": "", + "white": "", + "bblack": "", + "bred": "", + "bgreen": "", + "byellow": "", + "bblue": "", + "bpurple": "", + "bcyan": "", + "bwhite": "", + "ublack": "", + "ured": "", + "ugreen": "", + "uyellow": "", + "ublue": "", + "upurple": "", + "ucyan": "", + "uwhite": "", + "iblack": "", + "ired": "", + "igreen": "", + "iyellow": "", + "iblue": "", + "ipurple": "", + "icyan": "", + "iwhite": "", + "biblack": "", + "bired": "", + "bigreen": "", + "biyellow": "", + "biblue": "", + "bipurple": "", + "bicyan": "", + "biwhite": "" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:17.969323" + }, + "Test Function with logColours": { + "content": [ + { + "reset": "\u001b[0m", + "bold": "\u001b[1m", + "dim": "\u001b[2m", + "underlined": "\u001b[4m", + "blink": "\u001b[5m", + "reverse": "\u001b[7m", + "hidden": "\u001b[8m", + "black": "\u001b[0;30m", + "red": "\u001b[0;31m", + "green": "\u001b[0;32m", + "yellow": "\u001b[0;33m", + "blue": "\u001b[0;34m", + "purple": "\u001b[0;35m", + "cyan": "\u001b[0;36m", + "white": "\u001b[0;37m", + "bblack": "\u001b[1;30m", + "bred": "\u001b[1;31m", + "bgreen": "\u001b[1;32m", + "byellow": "\u001b[1;33m", + "bblue": "\u001b[1;34m", + "bpurple": "\u001b[1;35m", + "bcyan": "\u001b[1;36m", + "bwhite": "\u001b[1;37m", + "ublack": "\u001b[4;30m", + "ured": "\u001b[4;31m", + "ugreen": "\u001b[4;32m", + "uyellow": "\u001b[4;33m", + "ublue": "\u001b[4;34m", + "upurple": "\u001b[4;35m", + "ucyan": "\u001b[4;36m", + "uwhite": "\u001b[4;37m", + "iblack": "\u001b[0;90m", + "ired": "\u001b[0;91m", + "igreen": "\u001b[0;92m", + "iyellow": "\u001b[0;93m", + "iblue": "\u001b[0;94m", + "ipurple": "\u001b[0;95m", + "icyan": "\u001b[0;96m", + "iwhite": "\u001b[0;97m", + "biblack": "\u001b[1;90m", + "bired": "\u001b[1;91m", + "bigreen": "\u001b[1;92m", + "biyellow": "\u001b[1;93m", + "biblue": "\u001b[1;94m", + "bipurple": "\u001b[1;95m", + "bicyan": "\u001b[1;96m", + "biwhite": "\u001b[1;97m" + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:21.714424" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test new file mode 100644 index 00000000..8940d32d --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test @@ -0,0 +1,29 @@ +nextflow_workflow { + + name "Test Workflow UTILS_NFCORE_PIPELINE" + script "../main.nf" + config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config" + workflow "UTILS_NFCORE_PIPELINE" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "utils_nfcore_pipeline" + tag "subworkflows/utils_nfcore_pipeline" + + test("Should run without failures") { + + when { + workflow { + """ + input[0] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap new file mode 100644 index 00000000..859d1030 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap @@ -0,0 +1,19 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + true + ], + "valid_config": [ + true + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T12:03:25.726491" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config new file mode 100644 index 00000000..d0a926bf --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config @@ -0,0 +1,9 @@ +manifest { + name = 'nextflow_workflow' + author = """nf-core""" + homePage = 'https://127.0.0.1' + description = """Dummy pipeline""" + nextflowVersion = '!>=23.04.0' + version = '9.9.9' + doi = 'https://doi.org/10.5281/zenodo.5070524' +} diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml new file mode 100644 index 00000000..ac8523c9 --- /dev/null +++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/utils_nfcore_pipeline: + - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf new file mode 100644 index 00000000..4994303e --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -0,0 +1,46 @@ +// +// Subworkflow that uses the nf-schema plugin to validate parameters and render the parameter summary +// + +include { paramsSummaryLog } from 'plugin/nf-schema' +include { validateParameters } from 'plugin/nf-schema' + +workflow UTILS_NFSCHEMA_PLUGIN { + + take: + input_workflow // workflow: the workflow object used by nf-schema to get metadata from the workflow + validate_params // boolean: validate the parameters + parameters_schema // string: path to the parameters JSON schema. + // this has to be the same as the schema given to `validation.parametersSchema` + // when this input is empty it will automatically use the configured schema or + // "${projectDir}/nextflow_schema.json" as default. This input should not be empty + // for meta pipelines + + main: + + // + // Print parameter summary to stdout. This will display the parameters + // that differ from the default given in the JSON schema + // + if(parameters_schema) { + log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) + } else { + log.info paramsSummaryLog(input_workflow) + } + + // + // Validate the parameters using nextflow_schema.json or the schema + // given via the validation.parametersSchema configuration option + // + if(validate_params) { + if(parameters_schema) { + validateParameters(parameters_schema:parameters_schema) + } else { + validateParameters() + } + } + + emit: + dummy_emit = true +} + diff --git a/subworkflows/nf-core/utils_nfschema_plugin/meta.yml b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml new file mode 100644 index 00000000..f7d9f028 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/meta.yml @@ -0,0 +1,35 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "utils_nfschema_plugin" +description: Run nf-schema to validate parameters and create a summary of changed parameters +keywords: + - validation + - JSON schema + - plugin + - parameters + - summary +components: [] +input: + - input_workflow: + type: object + description: | + The workflow object of the used pipeline. + This object contains meta data used to create the params summary log + - validate_params: + type: boolean + description: Validate the parameters and error if invalid. + - parameters_schema: + type: string + description: | + Path to the parameters JSON schema. + This has to be the same as the schema given to the `validation.parametersSchema` config + option. When this input is empty it will automatically use the configured schema or + "${projectDir}/nextflow_schema.json" as default. The schema should not be given in this way + for meta pipelines. +output: + - dummy_emit: + type: boolean + description: Dummy emit to make nf-core subworkflows lint happy +authors: + - "@nvnieuwk" +maintainers: + - "@nvnieuwk" diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test new file mode 100644 index 00000000..8fb30164 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -0,0 +1,117 @@ +nextflow_workflow { + + name "Test Subworkflow UTILS_NFSCHEMA_PLUGIN" + script "../main.nf" + workflow "UTILS_NFSCHEMA_PLUGIN" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/utils_nfschema_plugin" + tag "plugin/nf-schema" + + config "./nextflow.config" + + test("Should run nothing") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } + + test("Should run nothing - custom schema") { + + when { + + params { + test_data = '' + } + + workflow { + """ + validate_params = false + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } + + test("Should validate params - custom schema") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + """ + } + } + + then { + assertAll( + { assert workflow.failed }, + { assert workflow.stdout.any { it.contains('ERROR ~ Validation of pipeline parameters failed!') } } + ) + } + } +} diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config new file mode 100644 index 00000000..0907ac58 --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -0,0 +1,8 @@ +plugins { + id "nf-schema@2.1.0" +} + +validation { + parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + monochromeLogs = true +} \ No newline at end of file diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json new file mode 100644 index 00000000..331e0d2f --- /dev/null +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json @@ -0,0 +1,96 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json", + "title": ". pipeline parameters", + "description": "", + "type": "object", + "$defs": { + "input_output_options": { + "title": "Input/output options", + "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], + "properties": { + "validate_params": { + "type": "boolean", + "description": "Validate parameters?", + "default": true, + "hidden": true + }, + "outdir": { + "type": "string", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" + }, + "test_data_base": { + "type": "string", + "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules", + "description": "Base for test data directory", + "hidden": true + }, + "test_data": { + "type": "string", + "description": "Fake test data param", + "hidden": true + } + } + }, + "generic_options": { + "title": "Generic options", + "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", + "properties": { + "help": { + "type": "boolean", + "description": "Display help text.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "version": { + "type": "boolean", + "description": "Display version and exit.", + "fa_icon": "fas fa-question-circle", + "hidden": true + }, + "logo": { + "type": "boolean", + "default": true, + "description": "Display nf-core logo in console output.", + "fa_icon": "fas fa-image", + "hidden": true + }, + "singularity_pull_docker_container": { + "type": "boolean", + "description": "Pull Singularity container from Docker?", + "hidden": true + }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], + "hidden": true + }, + "monochrome_logs": { + "type": "boolean", + "description": "Use monochrome_logs", + "hidden": true + } + } + } + }, + "allOf": [ + { + "$ref": "#/$defs/input_output_options" + }, + { + "$ref": "#/$defs/generic_options" + } + ] +} diff --git a/workflows/blobtoolkit.nf b/workflows/blobtoolkit.nf index cd97fd99..a0de4799 100644 --- a/workflows/blobtoolkit.nf +++ b/workflows/blobtoolkit.nf @@ -1,52 +1,3 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - PRINT PARAMS SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -WorkflowBlobtoolkit.initialise(params, log) - -// Add all file path parameters for the pipeline to the list below -// Check input path parameters to see if they exist -def checkPathParamList = [ params.input, params.multiqc_config, params.fasta, params.taxdump, params.busco, params.blastp, params.blastx, params.lineage_tax_ids ] -for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } - -// Check mandatory parameters -if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } -if (params.fasta) { ch_fasta = Channel.value([ [ 'id': params.accession ?: file(params.fasta.replace(".gz", "")).baseName ], file(params.fasta) ]) } else { exit 1, 'Genome fasta file must be specified!' } -if (params.taxon) { ch_taxon = Channel.value(params.taxon) } else { exit 1, 'NCBI Taxon ID not specified!' } -if (params.blastp) { ch_blastp = Channel.value([ [ 'id': file(params.blastp).baseName ], params.blastp ]) } else { exit 1, 'Diamond BLASTp database must be specified!' } -if (params.blastx) { ch_blastx = Channel.value([ [ 'id': file(params.blastx).baseName ], params.blastx ]) } else { exit 1, 'Diamond BLASTx database must be specified!' } -if (params.blastn) { ch_blastn = Channel.value([ [ 'id': file(params.blastn).baseName ], params.blastn ]) } else { exit 1, 'BLASTn database not specified!' } -if (params.taxdump) { ch_taxdump = file(params.taxdump) } else { exit 1, 'NCBI Taxonomy database not specified!' } -if (params.fetchngs_samplesheet && !params.align) { exit 1, '--align not specified, even though the input samplesheet is a nf-core/fetchngs one - i.e has fastq files!' } - -if (params.lineage_tax_ids) { ch_lineage_tax_ids = Channel.fromPath(params.lineage_tax_ids).first() } else { exit 1, 'Mapping BUSCO lineage <-> taxon_ids not specified' } - -// Create channel for optional parameters -if (params.busco_lineages) { ch_busco_lin = Channel.value(params.busco_lineages) } else { ch_busco_lin = Channel.value([]) } -if (params.busco) { ch_busco_db = Channel.fromPath(params.busco).first() } else { ch_busco_db = Channel.value([]) } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -ch_multiqc_config = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty() -ch_multiqc_logo = params.multiqc_logo ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty() -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS @@ -73,12 +24,11 @@ include { FINALISE_BLOBDIR } from '../subworkflows/local/finalise_blobdir' IMPORT NF-CORE MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ - -// -// MODULE: Installed directly from nf-core/modules -// -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_blobtoolkit_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -86,29 +36,34 @@ include { MULTIQC } from '../modules/nf-core/multiqc/main' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow BLOBTOOLKIT { - ch_versions = Channel.empty() + take: + ch_fasta + ch_databases + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() // // SUBWORKFLOW: Prepare genome for downstream processing // PREPARE_GENOME ( ch_fasta ) ch_versions = ch_versions.mix ( PREPARE_GENOME.out.versions ) + // Reference genome to be used (as a value channel) throughout the pipeline + ch_prepared_genome = PREPARE_GENOME.out.genome.first() + // // SUBWORKFLOW: Check samplesheet and create channels for downstream analysis // INPUT_CHECK ( - ch_input, - PREPARE_GENOME.out.genome, - ch_taxon, - ch_busco_lin, - ch_lineage_tax_ids, - ch_blastn, + params.input, + ch_prepared_genome, + params.taxon, + Channel.value(params.busco_lineages ?: []), + params.lineage_tax_ids, + ch_databases, ) ch_versions = ch_versions.mix ( INPUT_CHECK.out.versions ) @@ -116,7 +71,7 @@ workflow BLOBTOOLKIT { // SUBWORKFLOW: Optional read alignment // if ( params.align ) { - MINIMAP2_ALIGNMENT ( INPUT_CHECK.out.reads, PREPARE_GENOME.out.genome ) + MINIMAP2_ALIGNMENT ( INPUT_CHECK.out.reads, ch_prepared_genome ) ch_versions = ch_versions.mix ( MINIMAP2_ALIGNMENT.out.versions ) ch_aligned = MINIMAP2_ALIGNMENT.out.aln } else { @@ -126,18 +81,19 @@ workflow BLOBTOOLKIT { // // SUBWORKFLOW: Calculate genome coverage and statistics // - COVERAGE_STATS ( ch_aligned, PREPARE_GENOME.out.genome ) + COVERAGE_STATS ( ch_aligned, ch_prepared_genome ) ch_versions = ch_versions.mix ( COVERAGE_STATS.out.versions ) // // SUBWORKFLOW: Run BUSCO using lineages fetched from GoaT, then run diamond_blastp // BUSCO_DIAMOND ( - PREPARE_GENOME.out.genome, + ch_prepared_genome, INPUT_CHECK.out.busco_lineages, - ch_busco_db, - ch_blastp, + INPUT_CHECK.out.busco_db, + INPUT_CHECK.out.blastp, INPUT_CHECK.out.taxon_id, + INPUT_CHECK.out.precomputed_busco, ) ch_versions = ch_versions.mix ( BUSCO_DIAMOND.out.versions ) @@ -145,9 +101,9 @@ workflow BLOBTOOLKIT { // SUBWORKFLOW: Diamond blastx search of assembly contigs against the UniProt reference proteomes // RUN_BLASTX ( - PREPARE_GENOME.out.genome, + ch_prepared_genome, BUSCO_DIAMOND.out.first_table, - ch_blastx, + INPUT_CHECK.out.blastx, INPUT_CHECK.out.taxon_id, ) ch_versions = ch_versions.mix ( RUN_BLASTX.out.versions ) @@ -158,8 +114,8 @@ workflow BLOBTOOLKIT { // RUN_BLASTN ( RUN_BLASTX.out.blastx_out, - PREPARE_GENOME.out.genome, - ch_blastn, + ch_prepared_genome, + INPUT_CHECK.out.blastn, INPUT_CHECK.out.taxon_id, ) @@ -180,12 +136,14 @@ workflow BLOBTOOLKIT { // BLOBTOOLS ( INPUT_CHECK.out.config, + INPUT_CHECK.out.synonyms_tsv.ifEmpty([[],[]]), + INPUT_CHECK.out.categories_tsv.ifEmpty([[],[]]), COLLATE_STATS.out.window_tsv, BUSCO_DIAMOND.out.all_tables, BUSCO_DIAMOND.out.blastp_txt.ifEmpty([[],[]]), RUN_BLASTX.out.blastx_out.ifEmpty([[],[]]), RUN_BLASTN.out.blastn_out.ifEmpty([[],[]]), - ch_taxdump + INPUT_CHECK.out.taxdump ) ch_versions = ch_versions.mix ( BLOBTOOLS.out.versions ) @@ -196,19 +154,23 @@ workflow BLOBTOOLKIT { ch_versions = ch_versions.mix(VIEW.out.versions) // - // MODULE: Combine different versions.yml + // Collate and save software versions // - CUSTOM_DUMPSOFTWAREVERSIONS ( - ch_versions.unique().collectFile(name: 'collated_versions.yml') - ) + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info/blobtoolkit", + name: 'blobtoolkit_software_' + 'mqc_' + 'versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } + // // SUBWORKFLOW: Finalise and publish the blobdir // FINALISE_BLOBDIR ( BLOBTOOLS.out.blobdir, - INPUT_CHECK.out.reads.collect(flat: false).ifEmpty([]), - CUSTOM_DUMPSOFTWAREVERSIONS.out.yml, + ch_collated_versions, VIEW.out.summary ) // Don't update ch_versions because it's already been consumed by now @@ -217,42 +179,49 @@ workflow BLOBTOOLKIT { // // MODULE: MultiQC // - workflow_summary = WorkflowBlobtoolkit.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) - - methods_description = WorkflowBlobtoolkit.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) + ch_multiqc_config = Channel.fromPath( + "$projectDir/assets/multiqc_config.yml", checkIfExists: true) + ch_multiqc_custom_config = params.multiqc_config ? + Channel.fromPath(params.multiqc_config, checkIfExists: true) : + Channel.empty() + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: true + ) + ) - ch_multiqc_files = Channel.empty() - ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) - ch_multiqc_files = ch_multiqc_files.mix(BUSCO_DIAMOND.out.multiqc.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + ch_multiqc_files = ch_multiqc_files.mix(BUSCO_DIAMOND.out.multiqc.collect().ifEmpty([])) MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_config.toList(), ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList() + ch_multiqc_logo.toList(), + [], + [] ) - multiqc_report = MULTIQC.out.report.toList() -} -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - COMPLETION EMAIL AND SUMMARY -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] -workflow.onComplete { - if (params.email || params.email_on_fail) { - NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) - } - NfcoreTemplate.dump_parameters(workflow, params) - NfcoreTemplate.summary(workflow, params, log) - if (params.hook_url) { - NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log) - } } /*
    Process Name \\", - " \\ Software Version
    CUSTOM_DUMPSOFTWAREVERSIONSpython3.11.7
    yaml5.4.1
    TOOL1tool10.11.9
    TOOL2tool21.9
    WorkflowNextflow