From aea5c83cf68fbc3dc9d6d958aa59e81687880cc4 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 4 Mar 2022 15:44:12 +0100 Subject: [PATCH 01/58] Bump pipeline version to 2.5dev --- CHANGELOG.md | 6 ++++++ nextflow.config | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff1122c8..7ef04971 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unpublished Version / DEV] + +### Enhancements & fixes + +### Parameters + ## [[2.4.1](https://github.com/nf-core/viralrecon/releases/tag/2.4.1)] - 2022-03-01 ### Enhancements & fixes diff --git a/nextflow.config b/nextflow.config index 6bceabfd..9e2e9760 100644 --- a/nextflow.config +++ b/nextflow.config @@ -235,7 +235,7 @@ manifest { description = 'Assembly and intrahost/low-frequency variant calling for viral samples' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '2.4.1' + version = '2.5dev' } // Load modules.config for DSL2 module specific options From 98aba35ffc10258308333dbfa20947288a9c5177 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Tue, 15 Mar 2022 21:06:09 +0000 Subject: [PATCH 02/58] Template update for nf-core/tools version 2.3 --- .github/ISSUE_TEMPLATE/bug_report.yml | 1 - .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/awsfulltest.yml | 6 +- .github/workflows/awstest.yml | 6 +- .github/workflows/ci.yml | 4 +- .github/workflows/linting.yml | 28 +-- .gitpod.yml | 14 ++ .nf-core.yml | 1 + .yamllint.yml | 6 + CHANGELOG.md | 2 +- README.md | 6 +- bin/check_samplesheet.py | 346 +++++++++++++++++--------- conf/base.config | 4 +- conf/igenomes.config | 80 +++--- conf/modules.config | 18 +- conf/test.config | 6 +- conf/test_full.config | 6 +- docs/usage.md | 21 +- lib/NfcoreSchema.groovy | 4 +- lib/Utils.groovy | 4 +- lib/WorkflowViralrecon.groovy | 4 +- main.nf | 24 +- nextflow.config | 20 +- nextflow_schema.json | 23 +- subworkflows/local/input_check.nf | 18 +- workflows/viralrecon.nf | 28 +-- 26 files changed, 421 insertions(+), 261 deletions(-) create mode 100644 .gitpod.yml create mode 100644 .nf-core.yml create mode 100644 .yamllint.yml diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index f332a0b2..b5338dc4 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -1,4 +1,3 @@ - name: Bug report description: Report something that is broken or incorrect labels: bug diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 0536b7fb..063def28 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,7 +19,7 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/vira - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md) - [ ] If necessary, also make a PR on the nf-core/viralrecon _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker` --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index ac4daba8..42a8c39b 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -14,7 +14,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: nf-core/tower-action@v2 + uses: nf-core/tower-action@v3 # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters @@ -31,4 +31,6 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/viralrecon/results-${{ github.sha }}" } profiles: test_full,aws_tower - pre_run_script: 'export NXF_VER=21.10.3' + nextflow_config: | + process.errorStrategy = 'retry' + process.maxRetries = 3 diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index 2206ce65..dc710981 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Launch workflow via tower - uses: nf-core/tower-action@v2 + uses: nf-core/tower-action@v3 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} @@ -25,4 +25,6 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/viralrecon/results-test-${{ github.sha }}" } profiles: test,aws_tower - pre_run_script: 'export NXF_VER=21.10.3' + nextflow_config: | + process.errorStrategy = 'retry' + process.maxRetries = 3 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 73360662..4b012729 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ env: jobs: test: - name: Run workflow tests + name: Run pipeline with test data # Only run on push if this is the nf-core dev branch (merged PRs) if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} runs-on: ubuntu-latest @@ -47,4 +47,4 @@ jobs: # For example: adding multiple test runs with different parameters # Remember that you can parallelise this by using strategy.matrix run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 3b448773..fda934c0 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -12,9 +12,7 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: actions/setup-node@v1 - with: - node-version: '10' + - uses: actions/setup-node@v2 - name: Install markdownlint run: npm install -g markdownlint-cli - name: Run Markdownlint @@ -51,9 +49,7 @@ jobs: steps: - uses: actions/checkout@v2 - - uses: actions/setup-node@v1 - with: - node-version: '10' + - uses: actions/setup-node@v2 - name: Install editorconfig-checker run: npm install -g editorconfig-checker @@ -64,14 +60,13 @@ jobs: YAML: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 - - uses: actions/setup-node@v1 + - name: Checkout + uses: actions/checkout@master + - name: 'Yamllint' + uses: karancode/yamllint-github-action@master with: - node-version: '10' - - name: Install yaml-lint - run: npm install -g yaml-lint - - name: Run yaml-lint - run: yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml" -o -name "*.yaml") + yamllint_file_or_dir: '.' + yamllint_config_filepath: '.yamllint.yml' # If the above check failed, post a comment on the PR explaining the failure - name: Post PR comment @@ -84,10 +79,11 @@ jobs: To keep the code consistent with lots of contributors, we run automated code consistency checks. To fix this CI test, please run: - * Install `yaml-lint` - * [Install `npm`](https://www.npmjs.com/get-npm) then [install `yaml-lint`](https://www.npmjs.com/package/yaml-lint) (`npm install -g yaml-lint`) + * Install `yamllint` + * Install `yamllint` following [this](https://yamllint.readthedocs.io/en/stable/quickstart.html#installing-yamllint) + instructions or alternative install it in your [conda environment](https://anaconda.org/conda-forge/yamllint) * Fix the markdown errors - * Run the test locally: `yamllint $(find . -type f -name "*.yml" -o -name "*.yaml")` + * Run the test locally: `yamllint $(find . -type f -name "*.yml" -o -name "*.yaml") -c ./.yamllint.yml` * Fix any reported errors in your YAML files Once you push these changes the test should pass, and you can hide this comment :+1: diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 00000000..b7d4cee1 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,14 @@ +image: nfcore/gitpod:latest + +vscode: + extensions: # based on nf-core.nf-core-extensionpack + - codezombiech.gitignore # Language support for .gitignore files + # - cssho.vscode-svgviewer # SVG viewer + - davidanson.vscode-markdownlint # Markdown/CommonMark linting and style checking for Visual Studio Code + - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + # - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code diff --git a/.nf-core.yml b/.nf-core.yml new file mode 100644 index 00000000..3805dc81 --- /dev/null +++ b/.nf-core.yml @@ -0,0 +1 @@ +repository_type: pipeline diff --git a/.yamllint.yml b/.yamllint.yml new file mode 100644 index 00000000..d466deec --- /dev/null +++ b/.yamllint.yml @@ -0,0 +1,6 @@ +extends: default + +rules: + document-start: disable + line-length: disable + truthy: disable diff --git a/CHANGELOG.md b/CHANGELOG.md index 44fb15b7..400db1d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.3dev - [date] +## v2.5dev - [date] Initial release of nf-core/viralrecon, created with the [nf-core](https://nf-co.re/) template. diff --git a/README.md b/README.md index 1c3cde14..93e38252 100644 --- a/README.md +++ b/README.md @@ -40,14 +40,14 @@ On release, automated continuous integration tests run the pipeline on a full-si 3. Download the pipeline and test it on a minimal dataset with a single command: ```console - nextflow run nf-core/viralrecon -profile test,YOURPROFILE + nextflow run nf-core/viralrecon -profile test,YOURPROFILE --outdir ``` Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. > * The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. > * Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > * If you are using `singularity` and are persistently observing issues downloading Singularity images directly due to timeout or network issues, then you can use the `--singularity_pull_docker_container` parameter to pull and convert the Docker image instead. Alternatively, you can use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. + > * If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. > * If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. 4. Start running your own analysis! @@ -55,7 +55,7 @@ On release, automated continuous integration tests run the pipeline on a full-si ```console - nextflow run nf-core/viralrecon -profile --input samplesheet.csv --genome GRCh37 + nextflow run nf-core/viralrecon --input samplesheet.csv --outdir --genome GRCh37 -profile ``` ## Documentation diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 03567447..5473b624 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -1,145 +1,249 @@ #!/usr/bin/env python -# TODO nf-core: Update the script to check the samplesheet -# This script is based on the example at: https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv -import os -import sys -import errno +"""Provide a command line tool to validate and transform tabular samplesheets.""" + + import argparse +import csv +import logging +import sys +from collections import Counter +from pathlib import Path -def parse_args(args=None): - Description = "Reformat nf-core/viralrecon samplesheet file and check its contents." - Epilog = "Example usage: python check_samplesheet.py " +logger = logging.getLogger() - parser = argparse.ArgumentParser(description=Description, epilog=Epilog) - parser.add_argument("FILE_IN", help="Input samplesheet file.") - parser.add_argument("FILE_OUT", help="Output file.") - return parser.parse_args(args) +class RowChecker: + """ + Define a service that can validate and transform each given row. -def make_dir(path): - if len(path) > 0: - try: - os.makedirs(path) - except OSError as exception: - if exception.errno != errno.EEXIST: - raise exception + Attributes: + modified (list): A list of dicts, where each dict corresponds to a previously + validated and transformed row. The order of rows is maintained. + """ -def print_error(error, context="Line", context_str=""): - error_str = "ERROR: Please check samplesheet -> {}".format(error) - if context != "" and context_str != "": - error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( - error, context.strip(), context_str.strip() + VALID_FORMATS = ( + ".fq.gz", + ".fastq.gz", + ) + + def __init__( + self, + sample_col="sample", + first_col="fastq_1", + second_col="fastq_2", + single_col="single_end", + **kwargs, + ): + """ + Initialize the row checker with the expected column names. + + Args: + sample_col (str): The name of the column that contains the sample name + (default "sample"). + first_col (str): The name of the column that contains the first (or only) + FASTQ file path (default "fastq_1"). + second_col (str): The name of the column that contains the second (if any) + FASTQ file path (default "fastq_2"). + single_col (str): The name of the new column that will be inserted and + records whether the sample contains single- or paired-end sequencing + reads (default "single_end"). + + """ + super().__init__(**kwargs) + self._sample_col = sample_col + self._first_col = first_col + self._second_col = second_col + self._single_col = single_col + self._seen = set() + self.modified = [] + + def validate_and_transform(self, row): + """ + Perform all validations on the given row and insert the read pairing status. + + Args: + row (dict): A mapping from column headers (keys) to elements of that row + (values). + + """ + self._validate_sample(row) + self._validate_first(row) + self._validate_second(row) + self._validate_pair(row) + self._seen.add((row[self._sample_col], row[self._first_col])) + self.modified.append(row) + + def _validate_sample(self, row): + """Assert that the sample name exists and convert spaces to underscores.""" + assert len(row[self._sample_col]) > 0, "Sample input is required." + # Sanitize samples slightly. + row[self._sample_col] = row[self._sample_col].replace(" ", "_") + + def _validate_first(self, row): + """Assert that the first FASTQ entry is non-empty and has the right format.""" + assert len(row[self._first_col]) > 0, "At least the first FASTQ file is required." + self._validate_fastq_format(row[self._first_col]) + + def _validate_second(self, row): + """Assert that the second FASTQ entry has the right format if it exists.""" + if len(row[self._second_col]) > 0: + self._validate_fastq_format(row[self._second_col]) + + def _validate_pair(self, row): + """Assert that read pairs have the same file extension. Report pair status.""" + if row[self._first_col] and row[self._second_col]: + row[self._single_col] = False + assert ( + Path(row[self._first_col]).suffixes == Path(row[self._second_col]).suffixes + ), "FASTQ pairs must have the same file extensions." + else: + row[self._single_col] = True + + def _validate_fastq_format(self, filename): + """Assert that a given filename has one of the expected FASTQ extensions.""" + assert any(filename.endswith(extension) for extension in self.VALID_FORMATS), ( + f"The FASTQ file has an unrecognized extension: {filename}\n" + f"It should be one of: {', '.join(self.VALID_FORMATS)}" ) - print(error_str) - sys.exit(1) + def validate_unique_samples(self): + """ + Assert that the combination of sample name and FASTQ filename is unique. + + In addition to the validation, also rename the sample if more than one sample, + FASTQ file combination exists. + + """ + assert len(self._seen) == len(self.modified), "The pair of sample name and FASTQ must be unique." + if len({pair[0] for pair in self._seen}) < len(self._seen): + counts = Counter(pair[0] for pair in self._seen) + seen = Counter() + for row in self.modified: + sample = row[self._sample_col] + seen[sample] += 1 + if counts[sample] > 1: + row[self._sample_col] = f"{sample}_T{seen[sample]}" + + +def sniff_format(handle): + """ + Detect the tabular format. + + Args: + handle (text file): A handle to a `text file`_ object. The read position is + expected to be at the beginning (index 0). + + Returns: + csv.Dialect: The detected tabular format. + + .. _text file: + https://docs.python.org/3/glossary.html#term-text-file -# TODO nf-core: Update the check_samplesheet function -def check_samplesheet(file_in, file_out): """ - This function checks that the samplesheet follows the following structure: + peek = handle.read(2048) + sniffer = csv.Sniffer() + if not sniffer.has_header(peek): + logger.critical(f"The given sample sheet does not appear to contain a header.") + sys.exit(1) + dialect = sniffer.sniff(peek) + handle.seek(0) + return dialect - sample,fastq_1,fastq_2 - SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz - SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz - SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, - For an example see: - https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv +def check_samplesheet(file_in, file_out): """ + Check that the tabular samplesheet has the structure expected by nf-core pipelines. - sample_mapping_dict = {} - with open(file_in, "r") as fin: + Validate the general shape of the table, expected columns, and each row. Also add + an additional column which records whether one or two FASTQ reads were found. - ## Check header - MIN_COLS = 2 - # TODO nf-core: Update the column names for the input samplesheet - HEADER = ["sample", "fastq_1", "fastq_2"] - header = [x.strip('"') for x in fin.readline().strip().split(",")] - if header[: len(HEADER)] != HEADER: - print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER))) - sys.exit(1) + Args: + file_in (pathlib.Path): The given tabular samplesheet. The format can be either + CSV, TSV, or any other format automatically recognized by ``csv.Sniffer``. + file_out (pathlib.Path): Where the validated and transformed samplesheet should + be created; always in CSV format. - ## Check sample entries - for line in fin: - lspl = [x.strip().strip('"') for x in line.strip().split(",")] - - # Check valid number of columns per row - if len(lspl) < len(HEADER): - print_error( - "Invalid number of columns (minimum = {})!".format(len(HEADER)), - "Line", - line, - ) - num_cols = len([x for x in lspl if x]) - if num_cols < MIN_COLS: - print_error( - "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), - "Line", - line, - ) - - ## Check sample name entries - sample, fastq_1, fastq_2 = lspl[: len(HEADER)] - sample = sample.replace(" ", "_") - if not sample: - print_error("Sample entry has not been specified!", "Line", line) - - ## Check FastQ file extension - for fastq in [fastq_1, fastq_2]: - if fastq: - if fastq.find(" ") != -1: - print_error("FastQ file contains spaces!", "Line", line) - if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): - print_error( - "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", - "Line", - line, - ) - - ## Auto-detect paired-end/single-end - sample_info = [] ## [single_end, fastq_1, fastq_2] - if sample and fastq_1 and fastq_2: ## Paired-end short reads - sample_info = ["0", fastq_1, fastq_2] - elif sample and fastq_1 and not fastq_2: ## Single-end short reads - sample_info = ["1", fastq_1, fastq_2] - else: - print_error("Invalid combination of columns provided!", "Line", line) - - ## Create sample mapping dictionary = { sample: [ single_end, fastq_1, fastq_2 ] } - if sample not in sample_mapping_dict: - sample_mapping_dict[sample] = [sample_info] - else: - if sample_info in sample_mapping_dict[sample]: - print_error("Samplesheet contains duplicate rows!", "Line", line) - else: - sample_mapping_dict[sample].append(sample_info) - - ## Write validated samplesheet with appropriate columns - if len(sample_mapping_dict) > 0: - out_dir = os.path.dirname(file_out) - make_dir(out_dir) - with open(file_out, "w") as fout: - fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2"]) + "\n") - for sample in sorted(sample_mapping_dict.keys()): - - ## Check that multiple runs of the same sample are of the same datatype - if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): - print_error("Multiple runs of a sample must be of the same datatype!", "Sample: {}".format(sample)) - - for idx, val in enumerate(sample_mapping_dict[sample]): - fout.write(",".join(["{}_T{}".format(sample, idx + 1)] + val) + "\n") - else: - print_error("No entries to process!", "Samplesheet: {}".format(file_in)) - - -def main(args=None): - args = parse_args(args) - check_samplesheet(args.FILE_IN, args.FILE_OUT) + Example: + This function checks that the samplesheet follows the following structure, + see also the `viral recon samplesheet`_:: + + sample,fastq_1,fastq_2 + SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz + SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz + SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz, + + .. _viral recon samplesheet: + https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv + + """ + required_columns = {"sample", "fastq_1", "fastq_2"} + # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. + with file_in.open(newline="") as in_handle: + reader = csv.DictReader(in_handle, dialect=sniff_format(in_handle)) + # Validate the existence of the expected header columns. + if not required_columns.issubset(reader.fieldnames): + logger.critical(f"The sample sheet **must** contain the column headers: {', '.join(required_columns)}.") + sys.exit(1) + # Validate each row. + checker = RowChecker() + for i, row in enumerate(reader): + try: + checker.validate_and_transform(row) + except AssertionError as error: + logger.critical(f"{str(error)} On line {i + 2}.") + sys.exit(1) + checker.validate_unique_samples() + header = list(reader.fieldnames) + header.insert(1, "single_end") + # See https://docs.python.org/3.9/library/csv.html#id3 to read up on `newline=""`. + with file_out.open(mode="w", newline="") as out_handle: + writer = csv.DictWriter(out_handle, header, delimiter=",") + writer.writeheader() + for row in checker.modified: + writer.writerow(row) + + +def parse_args(argv=None): + """Define and immediately parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Validate and transform a tabular samplesheet.", + epilog="Example: python check_samplesheet.py samplesheet.csv samplesheet.valid.csv", + ) + parser.add_argument( + "file_in", + metavar="FILE_IN", + type=Path, + help="Tabular input samplesheet in CSV or TSV format.", + ) + parser.add_argument( + "file_out", + metavar="FILE_OUT", + type=Path, + help="Transformed output samplesheet in CSV format.", + ) + parser.add_argument( + "-l", + "--log-level", + help="The desired log level (default WARNING).", + choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG"), + default="WARNING", + ) + return parser.parse_args(argv) + + +def main(argv=None): + """Coordinate argument parsing and program execution.""" + args = parse_args(argv) + logging.basicConfig(level=args.log_level, format="[%(levelname)s] %(message)s") + if not args.file_in.is_file(): + logger.error(f"The given input file {args.file_in} was not found!") + sys.exit(2) + args.file_out.parent.mkdir(parents=True, exist_ok=True) + check_samplesheet(args.file_in, args.file_out) if __name__ == "__main__": diff --git a/conf/base.config b/conf/base.config index 7d647342..7d085b25 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/viralrecon Nextflow base config file -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ A 'blank slate' config file, appropriate for general use on most high performance compute environments. Assumes that all software is installed and available on the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. diff --git a/conf/igenomes.config b/conf/igenomes.config index 855948de..7a1b3ac6 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for iGenomes paths -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines reference genomes using iGenome paths. Can be used by any config that customises the base path using: $params.igenomes_base / --igenomes_base @@ -13,7 +13,7 @@ params { genomes { 'GRCh37' { fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" @@ -26,7 +26,7 @@ params { } 'GRCh38' { fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" @@ -38,7 +38,7 @@ params { } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" @@ -51,7 +51,7 @@ params { } 'TAIR10' { fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" @@ -62,7 +62,7 @@ params { } 'EB2' { fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" @@ -72,7 +72,7 @@ params { } 'UMD3.1' { fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" @@ -83,7 +83,7 @@ params { } 'WBcel235' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" @@ -94,7 +94,7 @@ params { } 'CanFam3.1' { fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" @@ -105,7 +105,7 @@ params { } 'GRCz10' { fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" @@ -115,7 +115,7 @@ params { } 'BDGP6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" @@ -126,7 +126,7 @@ params { } 'EquCab2' { fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" @@ -137,7 +137,7 @@ params { } 'EB1' { fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" @@ -147,7 +147,7 @@ params { } 'Galgal4' { fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" @@ -157,7 +157,7 @@ params { } 'Gm01' { fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" @@ -167,7 +167,7 @@ params { } 'Mmul_1' { fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" @@ -178,7 +178,7 @@ params { } 'IRGSP-1.0' { fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" @@ -188,7 +188,7 @@ params { } 'CHIMP2.1.4' { fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" @@ -199,7 +199,7 @@ params { } 'Rnor_5.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" @@ -209,7 +209,7 @@ params { } 'Rnor_6.0' { fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" @@ -219,7 +219,7 @@ params { } 'R64-1-1' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" @@ -230,7 +230,7 @@ params { } 'EF2' { fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" @@ -242,7 +242,7 @@ params { } 'Sbi1' { fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" @@ -252,7 +252,7 @@ params { } 'Sscrofa10.2' { fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" @@ -263,7 +263,7 @@ params { } 'AGPv3' { fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" @@ -273,7 +273,7 @@ params { } 'hg38' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" @@ -285,7 +285,7 @@ params { } 'hg19' { fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" @@ -298,7 +298,7 @@ params { } 'mm10' { fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" @@ -311,7 +311,7 @@ params { } 'bosTau8' { fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" @@ -321,7 +321,7 @@ params { } 'ce10' { fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" @@ -333,7 +333,7 @@ params { } 'canFam3' { fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" @@ -344,7 +344,7 @@ params { } 'danRer10' { fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" @@ -355,7 +355,7 @@ params { } 'dm6' { fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" @@ -366,7 +366,7 @@ params { } 'equCab2' { fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" @@ -377,7 +377,7 @@ params { } 'galGal4' { fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" @@ -388,7 +388,7 @@ params { } 'panTro4' { fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" @@ -399,7 +399,7 @@ params { } 'rn6' { fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" @@ -409,7 +409,7 @@ params { } 'sacCer3' { fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" @@ -419,7 +419,7 @@ params { } 'susScr3' { fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" diff --git a/conf/modules.config b/conf/modules.config index a0506a4d..da58a5d8 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -1,12 +1,12 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Config file for defining DSL2 per module options and publishing paths -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Available keys to override module options: - ext.args = Additional arguments appended to command in module. - ext.args2 = Second set of arguments appended to command in module (multi-tool modules). - ext.args3 = Third set of arguments appended to command in module (multi-tool modules). - ext.prefix = File name prefix for output files. + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. ---------------------------------------------------------------------------------------- */ @@ -14,14 +14,14 @@ process { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: 'copy', + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] withName: SAMPLESHEET_CHECK { publishDir = [ path: { "${params.outdir}/pipeline_info" }, - mode: 'copy', + mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } @@ -33,7 +33,7 @@ process { withName: CUSTOM_DUMPSOFTWAREVERSIONS { publishDir = [ path: { "${params.outdir}/pipeline_info" }, - mode: 'copy', + mode: params.publish_dir_mode, pattern: '*_versions.yml' ] } diff --git a/conf/test.config b/conf/test.config index 855f960c..28d644be 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,11 +1,11 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for running minimal tests -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a fast and simple pipeline test. Use as follows: - nextflow run nf-core/viralrecon -profile test, + nextflow run nf-core/viralrecon -profile test, --outdir ---------------------------------------------------------------------------------------- */ diff --git a/conf/test_full.config b/conf/test_full.config index dc5754ac..65255d65 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,11 +1,11 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Nextflow config file for running full-size tests -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Defines input files and everything required to run a full size pipeline test. Use as follows: - nextflow run nf-core/viralrecon -profile test_full, + nextflow run nf-core/viralrecon -profile test_full, --outdir ---------------------------------------------------------------------------------------- */ diff --git a/docs/usage.md b/docs/usage.md index 2752a6dc..977d8123 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -57,7 +57,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```console -nextflow run nf-core/viralrecon --input samplesheet.csv --genome GRCh37 -profile docker +nextflow run nf-core/viralrecon --input samplesheet.csv --outdir --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -141,11 +141,11 @@ Whilst the default requirements set within the pipeline will hopefully work for For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: ```console -[62/149eb0] NOTE: Process `RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) -Error executing process > 'RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' +[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) +Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' Caused by: - Process `RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) + Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) Command executed: STAR \ @@ -169,17 +169,24 @@ Work dir: Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` ``` -To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so based on the search results the file we want is `modules/nf-core/software/star/align/main.nf`. If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. Providing you haven't set any other standard nf-core parameters to __cap__ the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. +To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). +We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. +If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). +The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. +The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. +Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. +The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. ```nextflow process { - withName: STAR_ALIGN { + withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { memory = 100.GB } } ``` -> **NB:** We specify just the process name i.e. `STAR_ALIGN` in the config file and not the full task name string that is printed to screen in the error message or on the terminal whilst the pipeline is running i.e. `RNASEQ:ALIGN_STAR:STAR_ALIGN`. You may get a warning suggesting that the process selector isn't recognised but you can ignore that if the process name has been specified correctly. This is something that needs to be fixed upstream in core Nextflow. +> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. +> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. ### Updating containers diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy index 40ab65f2..b3d092f8 100755 --- a/lib/NfcoreSchema.groovy +++ b/lib/NfcoreSchema.groovy @@ -27,7 +27,7 @@ class NfcoreSchema { /* groovylint-disable-next-line UnusedPrivateMethodParameter */ public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { def has_error = false - //=====================================================================// + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// // Check for nextflow core params and unexpected params def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') @@ -135,7 +135,7 @@ class NfcoreSchema { } } - //=====================================================================// + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// // Validate parameters against the schema InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) diff --git a/lib/Utils.groovy b/lib/Utils.groovy index 1b88aec0..28567bd7 100755 --- a/lib/Utils.groovy +++ b/lib/Utils.groovy @@ -29,12 +29,12 @@ class Utils { conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) if (conda_check_failed) { - log.warn "=============================================================================\n" + + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " There is a problem with your Conda configuration!\n\n" + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + " NB: The order of the channels matters!\n" + - "===================================================================================" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" } } } diff --git a/lib/WorkflowViralrecon.groovy b/lib/WorkflowViralrecon.groovy index 0e42e360..9e22bb22 100755 --- a/lib/WorkflowViralrecon.groovy +++ b/lib/WorkflowViralrecon.groovy @@ -48,11 +48,11 @@ class WorkflowViralrecon { // private static void genomeExistsError(params, log) { if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - log.error "=============================================================================\n" + + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + " Currently, the available genome keys are:\n" + " ${params.genomes.keySet().join(", ")}\n" + - "===================================================================================" + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" System.exit(1) } } diff --git a/main.nf b/main.nf index 52c341dd..4fadbfa1 100644 --- a/main.nf +++ b/main.nf @@ -1,8 +1,8 @@ #!/usr/bin/env nextflow /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/viralrecon -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Github : https://github.com/nf-core/viralrecon Website: https://nf-co.re/viralrecon Slack : https://nfcore.slack.com/channels/viralrecon @@ -12,25 +12,25 @@ nextflow.enable.dsl = 2 /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GENOME PARAMETER VALUES -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE & PRINT PARAMETER SUMMARY -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ WorkflowMain.initialise(workflow, params, log) /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NAMED WORKFLOW FOR PIPELINE -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ include { VIRALRECON } from './workflows/viralrecon' @@ -43,9 +43,9 @@ workflow NFCORE_VIRALRECON { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN ALL WORKFLOWS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // @@ -57,7 +57,7 @@ workflow { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ diff --git a/nextflow.config b/nextflow.config index 5f8d00a8..06b06c45 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nf-core/viralrecon Nextflow config file -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Default config options for all compute environments ---------------------------------------------------------------------------------------- */ @@ -24,8 +24,9 @@ params { max_multiqc_email_size = '25.MB' // Boilerplate options - outdir = './results' + outdir = null tracedir = "${params.outdir}/pipeline_info" + publish_dir_mode = 'copy' email = null email_on_fail = null plaintext_email = false @@ -62,6 +63,15 @@ try { System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } +// Load nf-core/viralrecon custom profiles from different institutions. +// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// try { +// includeConfig "${params.custom_config_base}/pipeline/viralrecon.config" +// } catch (Exception e) { +// System.err.println("WARNING: Could not load nf-core/config/viralrecon profiles: ${params.custom_config_base}/pipeline/viralrecon.config") +// } + + profiles { debug { process.beforeScript = 'echo $HOSTNAME' } conda { @@ -121,7 +131,7 @@ if (!params.igenomes_ignore) { } // Export these variables to prevent local Python/R libraries from conflicting with those in the container -// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. env { @@ -159,7 +169,7 @@ manifest { description = 'Assembly and intrahost/low-frequency variant calling for viral samples' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '2.3dev' + version = '2.5dev' } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index b16be3c3..5bdf5c85 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -11,7 +11,8 @@ "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", "required": [ - "input" + "input", + "outdir" ], "properties": { "input": { @@ -26,8 +27,8 @@ }, "outdir": { "type": "string", - "description": "Path to the output directory where the results will be saved.", - "default": "./results", + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, "email": { @@ -178,6 +179,22 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, + "publish_dir_mode": { + "type": "string", + "default": "copy", + "description": "Method used to save pipeline results to output directory.", + "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", + "fa_icon": "fas fa-copy", + "enum": [ + "symlink", + "rellink", + "link", + "copy", + "copyNoFollow", + "move" + ], + "hidden": true + }, "email_on_fail": { "type": "string", "description": "Email address for completion summary, only when pipeline fails.", diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index cddcbb3c..0aecf87f 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -12,7 +12,7 @@ workflow INPUT_CHECK { SAMPLESHEET_CHECK ( samplesheet ) .csv .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channels(it) } + .map { create_fastq_channel(it) } .set { reads } emit: @@ -21,22 +21,24 @@ workflow INPUT_CHECK { } // Function to get list of [ meta, [ fastq_1, fastq_2 ] ] -def create_fastq_channels(LinkedHashMap row) { +def create_fastq_channel(LinkedHashMap row) { + // create meta map def meta = [:] - meta.id = row.sample - meta.single_end = row.single_end.toBoolean() + meta.id = row.sample + meta.single_end = row.single_end.toBoolean() - def array = [] + // add path(s) of the fastq file(s) to the meta map + def fastq_meta = [] if (!file(row.fastq_1).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" } if (meta.single_end) { - array = [ meta, [ file(row.fastq_1) ] ] + fastq_meta = [ meta, [ file(row.fastq_1) ] ] } else { if (!file(row.fastq_2).exists()) { exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" } - array = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] + fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] } - return array + return fastq_meta } diff --git a/workflows/viralrecon.nf b/workflows/viralrecon.nf index d730a813..3f12538a 100644 --- a/workflows/viralrecon.nf +++ b/workflows/viralrecon.nf @@ -1,7 +1,7 @@ /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ VALIDATE INPUTS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) @@ -18,18 +18,18 @@ for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CONFIG FILES -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT LOCAL MODULES/SUBWORKFLOWS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // @@ -38,9 +38,9 @@ ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multi include { INPUT_CHECK } from '../subworkflows/local/input_check' /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ IMPORT NF-CORE MODULES/SUBWORKFLOWS -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // @@ -51,9 +51,9 @@ include { MULTIQC } from '../modules/nf-core/modules/multiqc include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // Info required for completion email and summary @@ -104,9 +104,9 @@ workflow VIRALRECON { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ COMPLETION EMAIL AND SUMMARY -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ workflow.onComplete { @@ -117,7 +117,7 @@ workflow.onComplete { } /* -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END -======================================================================================== +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ From edd8bb9ff07cab4841acf530c651402a22b9b16a Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Wed, 23 Mar 2022 13:57:06 +0000 Subject: [PATCH 03/58] Template update for nf-core/tools version 2.3.1 --- .editorconfig | 5 +- .github/CONTRIBUTING.md | 15 +- .github/ISSUE_TEMPLATE/bug_report.yml | 1 - .github/PULL_REQUEST_TEMPLATE.md | 6 +- .github/workflows/awsfulltest.yml | 1 - .github/workflows/awstest.yml | 2 +- .github/workflows/branch.yml | 5 +- .github/workflows/ci.yml | 12 +- .github/workflows/linting.yml | 85 ++--------- .github/workflows/linting_comment.yml | 3 +- .gitpod.yml | 16 +- .markdownlint.yml | 14 -- .prettierrc.yml | 1 + .yamllint.yml | 6 - CHANGELOG.md | 2 + CITATIONS.md | 27 ++-- README.md | 31 ++-- assets/email_template.html | 142 ++++++++++++------ assets/multiqc_config.yaml | 11 -- assets/multiqc_config.yml | 11 ++ assets/schema_input.json | 5 +- docs/README.md | 8 +- docs/output.md | 28 ++-- docs/usage.md | 109 +++++++------- modules.json | 8 +- .../custom/dumpsoftwareversions/main.nf | 3 + .../custom/dumpsoftwareversions/meta.yml | 2 +- modules/nf-core/modules/fastqc/main.nf | 3 + modules/nf-core/modules/fastqc/meta.yml | 90 +++++------ modules/nf-core/modules/multiqc/main.nf | 9 +- modules/nf-core/modules/multiqc/meta.yml | 66 ++++---- nextflow_schema.json | 14 +- workflows/viralrecon.nf | 2 +- 33 files changed, 361 insertions(+), 382 deletions(-) delete mode 100644 .markdownlint.yml create mode 100644 .prettierrc.yml delete mode 100644 .yamllint.yml delete mode 100644 assets/multiqc_config.yaml create mode 100644 assets/multiqc_config.yml diff --git a/.editorconfig b/.editorconfig index 95549501..b6b31907 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,12 +8,9 @@ trim_trailing_whitespace = true indent_size = 4 indent_style = space -[*.{yml,yaml}] +[*.{md,yml,yaml,html,css,scss,js}] indent_size = 2 -[*.json] -insert_final_newline = unset - # These files are edited and tested upstream in nf-core/modules [/modules/nf-core/**] charset = unset diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index b4bff9b6..17e6615b 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -15,8 +15,7 @@ Contributions to the code are even more welcome ;) If you'd like to write some code for nf-core/viralrecon, the standard workflow is as follows: -1. Check that there isn't already an issue about your idea in the [nf-core/viralrecon issues](https://github.com/nf-core/viralrecon/issues) to avoid duplicating work - * If there isn't one already, please create one so that others know you're working on this +1. Check that there isn't already an issue about your idea in the [nf-core/viralrecon issues](https://github.com/nf-core/viralrecon/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/viralrecon repository](https://github.com/nf-core/viralrecon) to your GitHub account 3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) 4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). @@ -49,9 +48,9 @@ These tests are run both with the latest available version of `Nextflow` and als :warning: Only in the unlikely and regretful event of a release happening with a bug. -* On your own fork, make a new branch `patch` based on `upstream/master`. -* Fix the bug, and bump version (X.Y.Z+1). -* A PR should be made on `master` from patch to directly this particular bug. +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. ## Getting help @@ -73,7 +72,7 @@ If you wish to contribute a new step, please use the following coding standards: 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. 8. If applicable, add a new test command in `.github/workflow/ci.yml`. -9. Update MultiQC config `assets/multiqc_config.yaml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. 10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. ### Default values @@ -92,8 +91,8 @@ The process resources can be passed on to the tool dynamically within the proces Please use the following naming schemes, to make it easy to understand what is going where. -* initial process channel: `ch_output_from_` -* intermediate and terminal channels: `ch__for_` +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` ### Nextflow version bumping diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index b5338dc4..c8de4b1e 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -2,7 +2,6 @@ name: Bug report description: Report something that is broken or incorrect labels: bug body: - - type: markdown attributes: value: | diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 063def28..7db186f5 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -16,10 +16,10 @@ Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/vira - [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! - - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md) - - [ ] If necessary, also make a PR on the nf-core/viralrecon _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. + - [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md) + - [ ] If necessary, also make a PR on the nf-core/viralrecon _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. - [ ] Make sure your code lints (`nf-core lint`). -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker` --outdir `). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). - [ ] Usage Documentation in `docs/usage.md` is updated. - [ ] Output Documentation in `docs/output.md` is updated. - [ ] `CHANGELOG.md` is updated. diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 42a8c39b..7c748574 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -18,7 +18,6 @@ jobs: # TODO nf-core: You can customise AWS full pipeline tests as required # Add full size test data (but still relatively small datasets for few samples) # on the `test_full.config` test runs with only one set of parameters - with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index dc710981..d8f5a6ff 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -10,9 +10,9 @@ jobs: if: github.repository == 'nf-core/viralrecon' runs-on: ubuntu-latest steps: + # Launch workflow using Tower CLI tool action - name: Launch workflow via tower uses: nf-core/tower-action@v3 - with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 1203ec66..3d59d488 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,8 +13,7 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/viralrecon' run: | - { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/viralrecon ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] - + "{ [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/viralrecon ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]" # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets @@ -43,4 +42,4 @@ jobs: Thanks again for your contribution! repo-token: ${{ secrets.GITHUB_TOKEN }} allow-repeats: false - +# diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4b012729..f5c5f501 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,18 +16,18 @@ jobs: test: name: Run pipeline with test data # Only run on push if this is the nf-core dev branch (merged PRs) - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }} + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }}" runs-on: ubuntu-latest strategy: matrix: # Nextflow versions include: # Test pipeline minimum Nextflow version - - NXF_VER: '21.10.3' - NXF_EDGE: '' + - NXF_VER: "21.10.3" + NXF_EDGE: "" # Test latest edge release of Nextflow - - NXF_VER: '' - NXF_EDGE: '1' + - NXF_VER: "" + NXF_EDGE: "1" steps: - name: Check out pipeline code uses: actions/checkout@v2 @@ -48,3 +48,5 @@ jobs: # Remember that you can parallelise this by using strategy.matrix run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + +# diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index fda934c0..e9cf5de3 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,6 +1,7 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. on: push: pull_request: @@ -8,42 +9,6 @@ on: types: [published] jobs: - Markdown: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-node@v2 - - name: Install markdownlint - run: npm install -g markdownlint-cli - - name: Run Markdownlint - run: markdownlint . - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 - with: - message: | - ## Markdown linting is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install `markdownlint-cli` - * On Mac: `brew install markdownlint-cli` - * Everything else: [Install `npm`](https://www.npmjs.com/get-npm) then [install `markdownlint-cli`](https://www.npmjs.com/package/markdownlint-cli) (`npm install -g markdownlint-cli`) - * Fix the markdown errors - * Automatically: `markdownlint . --fix` - * Manually resolve anything left from `markdownlint .` - - Once you push these changes the test should pass, and you can hide this comment :+1: - - We highly recommend setting up markdownlint in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! - - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false - EditorConfig: runs-on: ubuntu-latest steps: @@ -55,49 +20,24 @@ jobs: run: npm install -g editorconfig-checker - name: Run ECLint check - run: editorconfig-checker -exclude README.md $(git ls-files | grep -v test) + run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') - YAML: + Prettier: runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@master - - name: 'Yamllint' - uses: karancode/yamllint-github-action@master - with: - yamllint_file_or_dir: '.' - yamllint_config_filepath: '.yamllint.yml' - - # If the above check failed, post a comment on the PR explaining the failure - - name: Post PR comment - if: failure() - uses: mshick/add-pr-comment@v1 - with: - message: | - ## YAML linting is failing - - To keep the code consistent with lots of contributors, we run automated code consistency checks. - To fix this CI test, please run: - - * Install `yamllint` - * Install `yamllint` following [this](https://yamllint.readthedocs.io/en/stable/quickstart.html#installing-yamllint) - instructions or alternative install it in your [conda environment](https://anaconda.org/conda-forge/yamllint) - * Fix the markdown errors - * Run the test locally: `yamllint $(find . -type f -name "*.yml" -o -name "*.yaml") -c ./.yamllint.yml` - * Fix any reported errors in your YAML files + - uses: actions/checkout@v2 - Once you push these changes the test should pass, and you can hide this comment :+1: + - uses: actions/setup-node@v2 - We highly recommend setting up yaml-lint in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + - name: Install Prettier + run: npm install -g prettier - Thanks again for your contribution! - repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false + - name: Run Prettier --check + run: prettier --check ${GITHUB_WORKSPACE} nf-core: runs-on: ubuntu-latest steps: - - name: Check out pipeline code uses: actions/checkout@v2 @@ -110,8 +50,8 @@ jobs: - uses: actions/setup-python@v1 with: - python-version: '3.6' - architecture: 'x64' + python-version: "3.6" + architecture: "x64" - name: Install dependencies run: | @@ -139,3 +79,4 @@ jobs: lint_results.md PR_number.txt +# diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 44d72994..91c487a1 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -1,4 +1,3 @@ - name: nf-core linting comment # This workflow is triggered after the linting action is complete # It posts an automated comment to the PR, even if the PR is coming from a fork @@ -27,4 +26,4 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} path: linting-logs/lint_results.md - +# diff --git a/.gitpod.yml b/.gitpod.yml index b7d4cee1..c452ee93 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -2,13 +2,13 @@ image: nfcore/gitpod:latest vscode: extensions: # based on nf-core.nf-core-extensionpack - - codezombiech.gitignore # Language support for .gitignore files + - codezombiech.gitignore # Language support for .gitignore files # - cssho.vscode-svgviewer # SVG viewer - - davidanson.vscode-markdownlint # Markdown/CommonMark linting and style checking for Visual Studio Code - - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar - - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + - davidanson.vscode-markdownlint # Markdown/CommonMark linting and style checking for Visual Studio Code + - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors # - nextflow.nextflow # Nextflow syntax highlighting - - oderwat.indent-rainbow # Highlight indentation level - - streetsidesoftware.code-spell-checker # Spelling checker for source code + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code diff --git a/.markdownlint.yml b/.markdownlint.yml deleted file mode 100644 index 9e605fcf..00000000 --- a/.markdownlint.yml +++ /dev/null @@ -1,14 +0,0 @@ -# Markdownlint configuration file -default: true -line-length: false -ul-indent: - indent: 4 -no-duplicate-header: - siblings_only: true -no-inline-html: - allowed_elements: - - img - - p - - kbd - - details - - summary diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 00000000..c81f9a76 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/.yamllint.yml b/.yamllint.yml deleted file mode 100644 index d466deec..00000000 --- a/.yamllint.yml +++ /dev/null @@ -1,6 +0,0 @@ -extends: default - -rules: - document-start: disable - line-length: disable - truthy: disable diff --git a/CHANGELOG.md b/CHANGELOG.md index 400db1d6..734d5ef2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ Initial release of nf-core/viralrecon, created with the [nf-core](https://nf-co. ### `Fixed` +- Clarified conda usage and added an installation tutorial for Singularity since the one on Syllabs' website uses an outdate version of GO Compiler + ### `Dependencies` ### `Deprecated` diff --git a/CITATIONS.md b/CITATIONS.md index 2e969e0f..80e96f9d 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,23 +10,26 @@ ## Pipeline tools -* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) +- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) -* [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. ## Software packaging/containerisation tools -* [Anaconda](https://anaconda.com) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. +- [Anaconda](https://anaconda.com) -* [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) - > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. -* [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) - > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) -* [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. -* [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) - > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index 93e38252..fbb6b91e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# ![nf-core/viralrecon](docs/images/nf-core-viralrecon_logo_light.png#gh-light-mode-only) ![nf-core/viralrecon](docs/images/nf-core-viralrecon_logo_dark.png#gh-dark-mode-only) +# ![nf-core/viralrecon](docs/images/nf-core/viralrecon_logo_light.png#gh-light-mode-only) ![nf-core/viralrecon](docs/images/nf-core/viralrecon_logo_dark.png#gh-dark-mode-only) [![GitHub Actions CI Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/viralrecon/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/viralrecon/actions?query=workflow%3A%22nf-core+linting%22) @@ -17,11 +17,13 @@ ## Introduction + **nf-core/viralrecon** is a bioinformatics best-practice analysis pipeline for Assembly and intrahost/low-frequency variant calling for viral samples. The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! + On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from the full-sized test can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). ## Pipeline summary @@ -35,28 +37,28 @@ On release, automated continuous integration tests run the pipeline on a full-si 1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) -2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ +2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. 3. Download the pipeline and test it on a minimal dataset with a single command: - ```console - nextflow run nf-core/viralrecon -profile test,YOURPROFILE --outdir - ``` + ```console + nextflow run nf-core/viralrecon -profile test,YOURPROFILE --outdir + ``` - Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. + Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. - > * The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. - > * Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. - > * If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. - > * If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. + > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. + > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. + > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. 4. Start running your own analysis! - + - ```console - nextflow run nf-core/viralrecon --input samplesheet.csv --outdir --genome GRCh37 -profile - ``` + ```console + nextflow run nf-core/viralrecon --input samplesheet.csv --outdir --genome GRCh37 -profile + ``` ## Documentation @@ -82,6 +84,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# + An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. You can cite the `nf-core` publication as follows: diff --git a/assets/email_template.html b/assets/email_template.html index 523e9a8f..f08c71a8 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,53 +1,111 @@ - - - - + + + + - - nf-core/viralrecon Pipeline Report - - -
+ + + nf-core/viralrecon Pipeline Report + + +
+ - +

nf-core/viralrecon v${version}

+

Run Name: $runName

-

nf-core/viralrecon v${version}

-

Run Name: $runName

- -<% if (!success){ - out << """ -
-

nf-core/viralrecon execution completed unsuccessfully!

+ <% if (!success){ out << """ +
+

nf-core/viralrecon execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

-
${errorReport}
-
- """ -} else { - out << """ -
+
${errorReport}
+
+ """ } else { out << """ +
nf-core/viralrecon execution completed successfully! -
- """ -} -%> +
+ """ } %> -

The workflow was completed at $dateComplete (duration: $duration)

-

The command used to launch the workflow was as follows:

-
$commandLine
+

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
+$commandLine
-

Pipeline Configuration:

- - - <% out << summary.collect{ k,v -> "" }.join("\n") %> - -
$k
$v
+

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> " + + + + + " }.join("\n") %> + +
+ $k + +
$v
+
-

nf-core/viralrecon

-

https://github.com/nf-core/viralrecon

- -
- - +

nf-core/viralrecon

+

https://github.com/nf-core/viralrecon

+
+ diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml deleted file mode 100644 index a9c1f583..00000000 --- a/assets/multiqc_config.yaml +++ /dev/null @@ -1,11 +0,0 @@ -report_comment: > - This report has been generated by the nf-core/viralrecon - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - software_versions: - order: -1000 - nf-core-viralrecon-summary: - order: -1001 - -export_plots: true diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml new file mode 100644 index 00000000..ace61f7b --- /dev/null +++ b/assets/multiqc_config.yml @@ -0,0 +1,11 @@ +report_comment: > + This report has been generated by the nf-core/viralrecon + analysis pipeline. For information about how to interpret these results, please see the + documentation. +report_section_order: + software_versions: + order: -1000 + "nf-core-viralrecon-summary": + order: -1001 + +export_plots: true diff --git a/assets/schema_input.json b/assets/schema_input.json index 08814a76..581e2709 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -31,9 +31,6 @@ ] } }, - "required": [ - "sample", - "fastq_1" - ] + "required": ["sample", "fastq_1"] } } diff --git a/docs/README.md b/docs/README.md index 0e457111..ceaf7450 100644 --- a/docs/README.md +++ b/docs/README.md @@ -2,9 +2,9 @@ The nf-core/viralrecon documentation is split into the following pages: -* [Usage](usage.md) - * An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. -* [Output](output.md) - * An overview of the different results produced by the pipeline and how to interpret them. +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/output.md b/docs/output.md index 072ec8ca..9ddc3653 100644 --- a/docs/output.md +++ b/docs/output.md @@ -12,18 +12,18 @@ The directories listed below will be created in the results directory after the The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: -* [FastQC](#fastqc) - Raw read QC -* [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline -* [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +- [FastQC](#fastqc) - Raw read QC +- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline +- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution ### FastQC
Output files -* `fastqc/` - * `*_fastqc.html`: FastQC report containing quality metrics. - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `fastqc/` + - `*_fastqc.html`: FastQC report containing quality metrics. + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images.
@@ -42,10 +42,10 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
Output files -* `multiqc/` - * `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - * `multiqc_plots/`: directory containing static images from the report in various formats. +- `multiqc/` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: directory containing static images from the report in various formats.
@@ -58,10 +58,10 @@ Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQ
Output files -* `pipeline_info/` - * Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. - * Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. - * Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`. +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`.
diff --git a/docs/usage.md b/docs/usage.md index 977d8123..a22fe85d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -44,11 +44,11 @@ TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, ``` -| Column | Description | -|----------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| Column | Description | +| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. @@ -57,7 +57,7 @@ An [example samplesheet](../assets/samplesheet.csv) has been provided with the p The typical command for running the pipeline is as follows: ```console -nextflow run nf-core/viralrecon --input samplesheet.csv --outdir --genome GRCh37 -profile docker +nextflow run nf-core/viralrecon --input samplesheet.csv --outdir --genome GRCh37 -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -65,9 +65,9 @@ This will launch the pipeline with the `docker` configuration profile. See below Note that the pipeline will create the following files in your working directory: ```console -work # Directory containing the nextflow working files -results # Finished results (configurable, see below) -.nextflow_log # Log file from Nextflow +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow # Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` @@ -106,25 +106,25 @@ They are loaded in sequence, so later profiles can overwrite earlier profiles. If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. -* `docker` - * A generic configuration profile to be used with [Docker](https://docker.com/) -* `singularity` - * A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) -* `podman` - * A generic configuration profile to be used with [Podman](https://podman.io/) -* `shifter` - * A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) -* `charliecloud` - * A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) -* `conda` - * A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. -* `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters ### `-resume` -Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. @@ -186,6 +186,7 @@ process { ``` > **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. +> > If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. ### Updating containers @@ -196,35 +197,35 @@ The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementatio 2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) 3. Create the custom config accordingly: - * For Docker: - - ```nextflow - process { - withName: PANGOLIN { - container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` - - * For Singularity: - - ```nextflow - process { - withName: PANGOLIN { - container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' - } - } - ``` - - * For Conda: - - ```nextflow - process { - withName: PANGOLIN { - conda = 'bioconda::pangolin=3.0.5' - } - } - ``` + - For Docker: + + ```nextflow + process { + withName: PANGOLIN { + container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' + } + } + ``` + + - For Singularity: + + ```nextflow + process { + withName: PANGOLIN { + container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' + } + } + ``` + + - For Conda: + + ```nextflow + process { + withName: PANGOLIN { + conda = 'bioconda::pangolin=3.0.5' + } + } + ``` > **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. diff --git a/modules.json b/modules.json index 26f60e0e..0ceb9749 100644 --- a/modules.json +++ b/modules.json @@ -4,14 +4,14 @@ "repos": { "nf-core/modules": { "custom/dumpsoftwareversions": { - "git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41" + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "fastqc": { - "git_sha": "9d0cad583b9a71a6509b754fdf589cbfbed08961" + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "multiqc": { - "git_sha": "20d8250d9f39ddb05dfb437603aaf99b5c0b2b41" + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" } } } -} \ No newline at end of file +} diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf index 934bb467..327d5100 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf @@ -15,6 +15,9 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { path "software_versions_mqc.yml", emit: mqc_yml path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' template 'dumpsoftwareversions.py' diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml index 5b5b8a60..60b546a0 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml @@ -8,7 +8,7 @@ tools: description: Custom module used to dump software versions within the nf-core pipeline template homepage: https://github.com/nf-core/tools documentation: https://github.com/nf-core/tools - licence: ['MIT'] + licence: ["MIT"] input: - versions: type: file diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf index d250eca0..ed6b8c50 100644 --- a/modules/nf-core/modules/fastqc/main.nf +++ b/modules/nf-core/modules/fastqc/main.nf @@ -15,6 +15,9 @@ process FASTQC { tuple val(meta), path("*.zip") , emit: zip path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' // Add soft-links to original FastQs for consistent naming in pipeline diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/modules/fastqc/meta.yml index b09553a3..4da5bb5a 100644 --- a/modules/nf-core/modules/fastqc/meta.yml +++ b/modules/nf-core/modules/fastqc/meta.yml @@ -1,52 +1,52 @@ name: fastqc description: Run FastQC on sequenced reads keywords: - - quality control - - qc - - adapters - - fastq + - quality control + - qc + - adapters + - fastq tools: - - fastqc: - description: | - FastQC gives general quality metrics about your reads. - It provides information about the quality score distribution - across your reads, the per base sequence content (%A/C/G/T). - You get information about adapter contamination and other - overrepresented sequences. - homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ - documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ - licence: ['GPL-2.0-only'] + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" - - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - - "@drpatelh" - - "@grst" - - "@ewels" - - "@FelixKrueger" + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/modules/multiqc/main.nf b/modules/nf-core/modules/multiqc/main.nf index 3dceb162..1264aac1 100644 --- a/modules/nf-core/modules/multiqc/main.nf +++ b/modules/nf-core/modules/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? 'bioconda::multiqc=1.11' : null) + conda (params.enable_conda ? 'bioconda::multiqc=1.12' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" input: path multiqc_files @@ -15,6 +15,9 @@ process MULTIQC { path "*_plots" , optional:true, emit: plots path "versions.yml" , emit: versions + when: + task.ext.when == null || task.ext.when + script: def args = task.ext.args ?: '' """ diff --git a/modules/nf-core/modules/multiqc/meta.yml b/modules/nf-core/modules/multiqc/meta.yml index 63c75a45..6fa891ef 100644 --- a/modules/nf-core/modules/multiqc/meta.yml +++ b/modules/nf-core/modules/multiqc/meta.yml @@ -1,40 +1,40 @@ name: MultiQC description: Aggregate results from bioinformatics analyses across many samples into a single report keywords: - - QC - - bioinformatics tools - - Beautiful stand-alone HTML report + - QC + - bioinformatics tools + - Beautiful stand-alone HTML report tools: - - multiqc: - description: | - MultiQC searches a given directory for analysis logs and compiles a HTML report. - It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. - homepage: https://multiqc.info/ - documentation: https://multiqc.info/docs/ - licence: ['GPL-3.0-or-later'] + - multiqc: + description: | + MultiQC searches a given directory for analysis logs and compiles a HTML report. + It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. + homepage: https://multiqc.info/ + documentation: https://multiqc.info/docs/ + licence: ["GPL-3.0-or-later"] input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC output: - - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - type: dir - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - report: + type: file + description: MultiQC report file + pattern: "multiqc_report.html" + - data: + type: dir + description: MultiQC data dir + pattern: "multiqc_data" + - plots: + type: file + description: Plots created by MultiQC + pattern: "*_data" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - - "@abhi18av" - - "@bunop" - - "@drpatelh" + - "@abhi18av" + - "@bunop" + - "@drpatelh" diff --git a/nextflow_schema.json b/nextflow_schema.json index 5bdf5c85..2fb362f5 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,10 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "input", - "outdir" - ], + "required": ["input", "outdir"], "properties": { "input": { "type": "string", @@ -185,14 +182,7 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "move" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, "email_on_fail": { diff --git a/workflows/viralrecon.nf b/workflows/viralrecon.nf index 3f12538a..c1394bec 100644 --- a/workflows/viralrecon.nf +++ b/workflows/viralrecon.nf @@ -23,7 +23,7 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yaml", checkIfExists: true) +ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() /* From 54a322526e5ac89be94cad3e7ec81f20a7baed64 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Thu, 24 Mar 2022 11:49:15 +0000 Subject: [PATCH 04/58] Template update for nf-core/tools version 2.3.2 --- .github/PULL_REQUEST_TEMPLATE.md | 1 - .github/workflows/awsfulltest.yml | 2 -- .github/workflows/awstest.yml | 2 -- .gitpod.yml | 2 +- CHANGELOG.md | 2 -- 5 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 7db186f5..3ff7d7ac 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -10,7 +10,6 @@ Remember that PRs should be made against the dev branch, unless you're preparing Learn more about contributing: [CONTRIBUTING.md](https://github.com/nf-core/viralrecon/tree/master/.github/CONTRIBUTING.md) --> - ## PR checklist diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 7c748574..4881dc1b 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -22,8 +22,6 @@ jobs: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - pipeline: ${{ github.repository }} - revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/viralrecon/work-${{ github.sha }} parameters: | { diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index d8f5a6ff..f9631c28 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -17,8 +17,6 @@ jobs: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - pipeline: ${{ github.repository }} - revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/viralrecon/work-${{ github.sha }} parameters: | { diff --git a/.gitpod.yml b/.gitpod.yml index c452ee93..85d95ecc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -4,7 +4,7 @@ vscode: extensions: # based on nf-core.nf-core-extensionpack - codezombiech.gitignore # Language support for .gitignore files # - cssho.vscode-svgviewer # SVG viewer - - davidanson.vscode-markdownlint # Markdown/CommonMark linting and style checking for Visual Studio Code + - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar diff --git a/CHANGELOG.md b/CHANGELOG.md index 734d5ef2..400db1d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,8 +11,6 @@ Initial release of nf-core/viralrecon, created with the [nf-core](https://nf-co. ### `Fixed` -- Clarified conda usage and added an installation tutorial for Singularity since the one on Syllabs' website uses an outdate version of GO Compiler - ### `Dependencies` ### `Deprecated` From c7e62dd7db3fad93aa918c88cceff36fb0a71d6a Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 31 Mar 2022 18:01:29 +0100 Subject: [PATCH 05/58] Update CHANGELOG after merging template updates --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 56f5432d..a8d65c7c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes +- Updated pipeline template to [nf-core/tools 2.3.2](https://github.com/nf-core/tools/releases/tag/2.3.2) + ### Parameters ## [[2.4.1](https://github.com/nf-core/viralrecon/releases/tag/2.4.1)] - 2022-03-01 From 5fa21911c890307b73531d1dda640fdae408cc56 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 31 Mar 2022 22:15:22 +0100 Subject: [PATCH 06/58] Fix template merge bug --- subworkflows/local/input_check.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf index 35f79aac..9d83d9f1 100644 --- a/subworkflows/local/input_check.nf +++ b/subworkflows/local/input_check.nf @@ -21,7 +21,7 @@ workflow INPUT_CHECK { .out .csv .splitCsv ( header:true, sep:',' ) - .map { create_fastq_channels(it) } + .map { create_fastq_channel(it) } .set { sample_info } } else if (platform == 'nanopore') { SAMPLESHEET_CHECK From bd19710be76c837513395fe46b8e1430683a4a8f Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 1 Apr 2022 11:14:36 +0100 Subject: [PATCH 07/58] Remove TODO statement in README --- README.md | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8aa66dc3..dbc0a07d 100644 --- a/README.md +++ b/README.md @@ -99,11 +99,64 @@ A number of improvements were made to the pipeline recently, mainly with regard 4. Start running your own analysis! - - - ```console - nextflow run nf-core/viralrecon --input samplesheet.csv --outdir --genome GRCh37 -profile - ``` + - Typical command for Illumina shotgun analysis: + + ```bash + nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --outdir \ + --platform illumina \ + --protocol metagenomic \ + --genome 'MN908947.3' \ + -profile + ``` + + - Typical command for Illumina amplicon analysis: + + ```bash + nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --outdir \ + --platform illumina \ + --protocol amplicon \ + --genome 'MN908947.3' \ + --primer_set artic \ + --primer_set_version 3 \ + --skip_assembly \ + -profile + ``` + + - Typical command for Nanopore amplicon analysis: + + ```bash + nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --outdir \ + --platform nanopore \ + --genome 'MN908947.3' \ + --primer_set_version 3 \ + --fastq_dir fastq_pass/ \ + --fast5_dir fast5_pass/ \ + --sequencing_summary sequencing_summary.txt \ + -profile + ``` + + - An executable Python script called [`fastq_dir_to_samplesheet.py`](https://github.com/nf-core/viralrecon/blob/master/bin/fastq_dir_to_samplesheet.py) has been provided if you are using `--platform illumina` and would like to auto-create an input samplesheet based on a directory containing FastQ files **before** you run the pipeline (requires Python 3 installed locally) e.g. + + ```console + wget -L https://raw.githubusercontent.com/nf-core/viralrecon/master/bin/fastq_dir_to_samplesheet.py + ./fastq_dir_to_samplesheet.py samplesheet.csv + ``` + + - You can find the default keys used to specify `--genome` in the [genomes config file](https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config). This provides default options for + + - Reference genomes (including SARS-CoV-2) + - Genome associates primer sets + - [Nextclade datasets](https://docs.nextstrain.org/projects/nextclade/en/latest/user/datasets.html) + + The Pangolin and Nextclade lineage and clade definitions change regularly as new SARS-CoV-2 lineages are discovered. For instructions to use more recent versions of lineage analysis tools like Pangolin and Nextclade please refer to the [updating containers](https://nf-co.re/viralrecon/usage#updating-containers) section in the usage docs. + + Where possible we are trying to collate links and settings for standard primer sets to make it easier to run the pipeline with standard keys; see [usage docs](https://nf-co.re/viralrecon/usage#illumina-primer-sets). ## Documentation From 952d10af7763502f3e8be16b510a2527eee8efcd Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 1 Apr 2022 11:16:20 +0100 Subject: [PATCH 08/58] Remove retry logic from AWS actions --- .github/workflows/awsfulltest.yml | 3 --- .github/workflows/awstest.yml | 3 --- 2 files changed, 6 deletions(-) diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 3135180c..5600fcf9 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -29,6 +29,3 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/viralrecon/results-${{ github.sha }}/platform_${{ matrix.platform }}" } profiles: test_full_${{ matrix.platform }},aws_tower - nextflow_config: | - process.errorStrategy = 'retry' - process.maxRetries = 3 diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index f9631c28..a3ce4a0a 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -23,6 +23,3 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/viralrecon/results-test-${{ github.sha }}" } profiles: test,aws_tower - nextflow_config: | - process.errorStrategy = 'retry' - process.maxRetries = 3 From 3d5c5605d88d6349f0ea13932b61986d6d16adea Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 1 Apr 2022 11:21:19 +0100 Subject: [PATCH 09/58] Add .prettierignore file --- .prettierignore | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 .prettierignore diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..37a8f6aa --- /dev/null +++ b/.prettierignore @@ -0,0 +1,8 @@ +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc \ No newline at end of file From 408b31eb90683a1025245f4f46c36d2e00fbf402 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 1 Apr 2022 11:43:49 +0100 Subject: [PATCH 10/58] Fix ECLint --- .prettierignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.prettierignore b/.prettierignore index 37a8f6aa..5124c9ac 100644 --- a/.prettierignore +++ b/.prettierignore @@ -5,4 +5,4 @@ results/ .DS_Store testing/ testing* -*.pyc \ No newline at end of file +*.pyc From 3ef29370e018bedd25ff60bb7b8a9b7be549b44d Mon Sep 17 00:00:00 2001 From: saramonzon Date: Tue, 12 Apr 2022 16:14:36 +0200 Subject: [PATCH 11/58] start reformat --- bin/ivar_variants_to_vcf.py | 164 ++++++++++++++++++------------------ 1 file changed, 84 insertions(+), 80 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index d3f126ea..7f652b7c 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -71,13 +71,13 @@ def check_consecutive(mylist): return False -def codon_position(seq1,seq2): +def get_diff_position(seq1,seq2): ''' Description: Function to compare two codon nucleotide sequences (size 3) and retuns the position where it differs. Input: - seq1 - list size 3 [A,T,C,G] - seq2 - list size 3 [A,T,C,G] + seq1 - string size 3 [A,T,C,G]. Ex. "ATC" + seq2 - string size 3 [A,T,C,G]. Ex. "ACC" Returns: Returns position where seq1 != seq2 ''' @@ -91,8 +91,75 @@ def codon_position(seq1,seq2): else: return ind_diff[0] +def merge_codons(lines_queue): + ## Always fill lines_queue until size 2. + if len(lines_queue["POS"]) == 0 or len(lines_queue["POS"]) == 1: + for i,j in enumerate(lines_queue): + lines_queue.setdefault(j, []).append(param_list[i]) + write_line=False + + # If queue has size 2, we include the third line + elif len(lines_queue["POS"]) == 2: + for i,j in enumerate(lines_queue): + lines_queue.setdefault(j, []).append(param_list[i]) + # Are two positions in the dict consecutive? + if check_consecutive(lines_queue["POS"]) == 2: + ## If the first position is not on the third position of the codon they are in the same codon. + if codon_position(lines_queue["REF_CODON"][0],lines_queue["ALT_CODON"][0]) != 2: + write_line = True + num_collapse = "2" + CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(lines_queue, num_collapse) + oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") + ## We removed the first two items in lines_queue with have been just processed. + for i,j in enumerate(lines_queue): + lines_queue[list(lines_queue.keys())[i]].pop(0) + lines_queue[list(lines_queue.keys())[i]].pop(0) + else: + write_line = True + oline =(lines_queue["CHROM"][0] + "\t" + lines_queue["POS"][0] + "\t" + lines_queue["ID"][0] + "\t" + lines_queue["REF"][0] + "\t" + lines_queue["ALT"][0] + "\t" + lines_queue["QUAL"][0] + "\t" + lines_queue["FILTER"][0] + "\t" + lines_queue["INFO"][0] + "\t" + lines_queue["FORMAT"][0] + "\t" + lines_queue["SAMPLE"][0] + "\n") + for i,j in enumerate(lines_queue): + lines_queue[list(lines_queue.keys())[i]].pop(0) + + # Are the three positions in the dict consecutive? + elif check_consecutive(lines_queue["POS"]) == 3: + ## we check the first position in which codon position is to process it acordingly. + # If first position is in the first codon position all three positions belong to the same codon. + if codon_position(lines_queue["REF_CODON"][0], lines_queue["ALT_CODON"][0]) == 0: + write_line = True + num_collapse = 3 + CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(lines_queue, num_collapse) + oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") + for i,j in enumerate(lines_queue): + lines_queue[list(lines_queue.keys())[i]].pop(0) + lines_queue[list(lines_queue.keys())[i]].pop(0) + # we empty the lines_queue + lines_queue = {'CHROM':[], 'POS':[], 'ID':[], 'REF':[], 'ALT':[], 'REF_DP':[], 'REF_RV':[], 'ALT_DP':[], 'ALT_RV':[], 'QUAL':[], 'REF_CODON':[], 'ALT_CODON':[], 'FILTER':[], 'INFO':[], 'FORMAT':[], 'SAMPLE':[]} + # If first position is in the second codon position, we have the two first positions belonging to the same codon and the last one independent. + elif codon_position(lines_queue["REF_CODON"][0], lines_queue["ALT_CODON"][0]) == 1: + write_line = True + num_collapse = 2 + CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(lines_queue, num_collapse) + oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") + for i,j in enumerate(lines_queue): + lines_queue[list(lines_queue.keys())[i]].pop(0) + lines_queue[list(lines_queue.keys())[i]].pop(0) + ## Finally if we have the first position in the last codon position, we write first position and left the remaining two to be evaluated in the next iteration. + elif codon_position(lines_queue["REF_CODON"][0], lines_queue["ALT_CODON"][0]) == 2: + write_line = True + oline =(lines_queue["CHROM"][0] + "\t" + lines_queue["POS"][0] + "\t" + lines_queue["ID"][0] + "\t" + lines_queue["REF"][0] + "\t" + lines_queue["ALT"][0] + "\t" + lines_queue["QUAL"][0] + "\t" + lines_queue["FILTER"][0] + "\t" + lines_queue["INFO"][0] + "\t" + lines_queue["FORMAT"][0] + "\t" + lines_queue["SAMPLE"][0] + "\n") + for i,j in enumerate(lines_queue): + lines_queue[list(lines_queue.keys())[i]].pop(0) + + elif check_consecutive(lines_queue["POS"]) == False: + write_line = True + oline =(lines_queue["CHROM"][0] + "\t" + lines_queue["POS"][0] + "\t" + lines_queue["ID"][0] + "\t" + lines_queue["REF"][0] + "\t" + lines_queue["ALT"][0] + "\t" + lines_queue["QUAL"][0] + "\t" + lines_queue["FILTER"][0] + "\t" + lines_queue["INFO"][0] + "\t" + lines_queue["FORMAT"][0] + "\t" + lines_queue["SAMPLE"][0] + "\n") + for i,j in enumerate(lines_queue): + lines_queue[list(lines_queue.keys())[i]].pop(0) + else: + print("Something went terribly wrong!!" + str(len(lines_queue["POS"]))) + -def rename_vars(dict_lines,num_collapse): +def get_lines_info(dict_lines,num_collapse): ''' Description: The function set the vars acordingly to the lines to collapse do to consecutive variants. @@ -105,11 +172,15 @@ def rename_vars(dict_lines,num_collapse): CHROM = dict_lines["CHROM"][0] POS = dict_lines["POS"][0] ID = dict_lines["ID"][0] - # If two consecutive collapse 2 lines into one. - if int(num_collapse) == 2: + # If no consecutive, process one line + if int(num_collapse) == 1: + REF = str(dict_lines["REF"][0]) + ALT = str(dict_lines["ALT"][0]) + # If two consecutive process two lines and write one. + elif int(num_collapse) == 2: REF = str(dict_lines["REF"][0]) + str(dict_lines["REF"][1]) ALT = str(dict_lines["ALT"][0]) + str(dict_lines["ALT"][1]) - # If three consecutive collapse 3 lines into one. + # If three consecutive process three lines and write one elif int(num_collapse) == 3: REF = str(dict_lines["REF"][0]) + str(dict_lines["REF"][1]) + str(dict_lines["REF"][2]) ALT = str(dict_lines["ALT"][0]) + str(dict_lines["ALT"][1]) + str(dict_lines["ALT"][2]) @@ -161,10 +232,6 @@ def ivar_variants_to_vcf(file_in, file_out, pass_only=False, min_allele_frequenc Returns: None ''' - ## Create output directory - filename = os.path.splitext(file_in)[0] - out_dir = os.path.dirname(file_out) - make_dir(out_dir) ## Define VCF header header_source = [ @@ -275,75 +342,6 @@ def ivar_variants_to_vcf(file_in, file_out, pass_only=False, min_allele_frequenc oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") else: - ## dict_lines contains all the informative fields for 3 positions in the vcf. - # dict_lines has a maximum size of three. - - ## Always fill dict_lines until size 2. - if len(dict_lines["POS"]) == 0 or len(dict_lines["POS"]) == 1: - for i,j in enumerate(dict_lines): - dict_lines.setdefault(j, []).append(param_list[i]) - write_line=False - - # If queue has size 2, we include the third line - elif len(dict_lines["POS"]) == 2: - for i,j in enumerate(dict_lines): - dict_lines.setdefault(j, []).append(param_list[i]) - # Are two positions in the dict consecutive? - if check_consecutive(dict_lines["POS"]) == 2: - ## If the first position is not on the third position of the codon they are in the same codon. - if codon_position(dict_lines["REF_CODON"][0],dict_lines["ALT_CODON"][0]) != 2: - write_line = True - num_collapse = "2" - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(dict_lines, num_collapse) - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - ## We removed the first two items in dict_lines with have been just processed. - for i,j in enumerate(dict_lines): - dict_lines[list(dict_lines.keys())[i]].pop(0) - dict_lines[list(dict_lines.keys())[i]].pop(0) - else: - write_line = True - oline =(dict_lines["CHROM"][0] + "\t" + dict_lines["POS"][0] + "\t" + dict_lines["ID"][0] + "\t" + dict_lines["REF"][0] + "\t" + dict_lines["ALT"][0] + "\t" + dict_lines["QUAL"][0] + "\t" + dict_lines["FILTER"][0] + "\t" + dict_lines["INFO"][0] + "\t" + dict_lines["FORMAT"][0] + "\t" + dict_lines["SAMPLE"][0] + "\n") - for i,j in enumerate(dict_lines): - dict_lines[list(dict_lines.keys())[i]].pop(0) - - # Are the three positions in the dict consecutive? - elif check_consecutive(dict_lines["POS"]) == 3: - ## we check the first position in which codon position is to process it acordingly. - # If first position is in the first codon position all three positions belong to the same codon. - if codon_position(dict_lines["REF_CODON"][0], dict_lines["ALT_CODON"][0]) == 0: - write_line = True - num_collapse = 3 - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(dict_lines, num_collapse) - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - for i,j in enumerate(dict_lines): - dict_lines[list(dict_lines.keys())[i]].pop(0) - dict_lines[list(dict_lines.keys())[i]].pop(0) - # we empty the dict_lines - dict_lines = {'CHROM':[], 'POS':[], 'ID':[], 'REF':[], 'ALT':[], 'REF_DP':[], 'REF_RV':[], 'ALT_DP':[], 'ALT_RV':[], 'QUAL':[], 'REF_CODON':[], 'ALT_CODON':[], 'FILTER':[], 'INFO':[], 'FORMAT':[], 'SAMPLE':[]} - # If first position is in the second codon position, we have the two first positions belonging to the same codon and the last one independent. - elif codon_position(dict_lines["REF_CODON"][0], dict_lines["ALT_CODON"][0]) == 1: - write_line = True - num_collapse = 2 - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(dict_lines, num_collapse) - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - for i,j in enumerate(dict_lines): - dict_lines[list(dict_lines.keys())[i]].pop(0) - dict_lines[list(dict_lines.keys())[i]].pop(0) - ## Finally if we have the first position in the last codon position, we write first position and left the remaining two to be evaluated in the next iteration. - elif codon_position(dict_lines["REF_CODON"][0], dict_lines["ALT_CODON"][0]) == 2: - write_line = True - oline =(dict_lines["CHROM"][0] + "\t" + dict_lines["POS"][0] + "\t" + dict_lines["ID"][0] + "\t" + dict_lines["REF"][0] + "\t" + dict_lines["ALT"][0] + "\t" + dict_lines["QUAL"][0] + "\t" + dict_lines["FILTER"][0] + "\t" + dict_lines["INFO"][0] + "\t" + dict_lines["FORMAT"][0] + "\t" + dict_lines["SAMPLE"][0] + "\n") - for i,j in enumerate(dict_lines): - dict_lines[list(dict_lines.keys())[i]].pop(0) - - elif check_consecutive(dict_lines["POS"]) == False: - write_line = True - oline =(dict_lines["CHROM"][0] + "\t" + dict_lines["POS"][0] + "\t" + dict_lines["ID"][0] + "\t" + dict_lines["REF"][0] + "\t" + dict_lines["ALT"][0] + "\t" + dict_lines["QUAL"][0] + "\t" + dict_lines["FILTER"][0] + "\t" + dict_lines["INFO"][0] + "\t" + dict_lines["FORMAT"][0] + "\t" + dict_lines["SAMPLE"][0] + "\n") - for i,j in enumerate(dict_lines): - dict_lines[list(dict_lines.keys())[i]].pop(0) - else: - print("Something went terribly wrong!!" + str(len(dict_lines["POS"]))) - ## Determine whether to output variant if pass_only and FILTER != "PASS": write_line = False @@ -386,6 +384,12 @@ def ivar_variants_to_vcf(file_in, file_out, pass_only=False, min_allele_frequenc def main(args=None): args = parse_args(args) + + ## Create output directory + filename = os.path.splitext(file_in)[0] + out_dir = os.path.dirname(file_out) + make_dir(out_dir) + ivar_variants_to_vcf( args.file_in, args.file_out, From 293664ee4971f0d4cfc28e7d1fc2e62d6dfc6b75 Mon Sep 17 00:00:00 2001 From: saramonzon Date: Wed, 13 Apr 2022 00:49:53 +0200 Subject: [PATCH 12/58] draft reformat --- bin/ivar_variants_to_vcf.py | 443 +++++++++++++++++------------------- 1 file changed, 206 insertions(+), 237 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index 7f652b7c..53dd8321 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -5,10 +5,12 @@ import re import errno import argparse +import queue +from collections import OrderedDict + import numpy as np from scipy.stats import fisher_exact - def parse_args(args=None): Description = "Convert iVar variants TSV file to VCF format." Epilog = """Example usage: python ivar_variants_to_vcf.py """ @@ -91,114 +93,65 @@ def get_diff_position(seq1,seq2): else: return ind_diff[0] -def merge_codons(lines_queue): - ## Always fill lines_queue until size 2. - if len(lines_queue["POS"]) == 0 or len(lines_queue["POS"]) == 1: - for i,j in enumerate(lines_queue): - lines_queue.setdefault(j, []).append(param_list[i]) - write_line=False - - # If queue has size 2, we include the third line - elif len(lines_queue["POS"]) == 2: - for i,j in enumerate(lines_queue): - lines_queue.setdefault(j, []).append(param_list[i]) - # Are two positions in the dict consecutive? - if check_consecutive(lines_queue["POS"]) == 2: - ## If the first position is not on the third position of the codon they are in the same codon. - if codon_position(lines_queue["REF_CODON"][0],lines_queue["ALT_CODON"][0]) != 2: - write_line = True - num_collapse = "2" - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(lines_queue, num_collapse) - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - ## We removed the first two items in lines_queue with have been just processed. - for i,j in enumerate(lines_queue): - lines_queue[list(lines_queue.keys())[i]].pop(0) - lines_queue[list(lines_queue.keys())[i]].pop(0) - else: - write_line = True - oline =(lines_queue["CHROM"][0] + "\t" + lines_queue["POS"][0] + "\t" + lines_queue["ID"][0] + "\t" + lines_queue["REF"][0] + "\t" + lines_queue["ALT"][0] + "\t" + lines_queue["QUAL"][0] + "\t" + lines_queue["FILTER"][0] + "\t" + lines_queue["INFO"][0] + "\t" + lines_queue["FORMAT"][0] + "\t" + lines_queue["SAMPLE"][0] + "\n") - for i,j in enumerate(lines_queue): - lines_queue[list(lines_queue.keys())[i]].pop(0) - - # Are the three positions in the dict consecutive? - elif check_consecutive(lines_queue["POS"]) == 3: - ## we check the first position in which codon position is to process it acordingly. - # If first position is in the first codon position all three positions belong to the same codon. - if codon_position(lines_queue["REF_CODON"][0], lines_queue["ALT_CODON"][0]) == 0: - write_line = True - num_collapse = 3 - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(lines_queue, num_collapse) - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - for i,j in enumerate(lines_queue): - lines_queue[list(lines_queue.keys())[i]].pop(0) - lines_queue[list(lines_queue.keys())[i]].pop(0) - # we empty the lines_queue - lines_queue = {'CHROM':[], 'POS':[], 'ID':[], 'REF':[], 'ALT':[], 'REF_DP':[], 'REF_RV':[], 'ALT_DP':[], 'ALT_RV':[], 'QUAL':[], 'REF_CODON':[], 'ALT_CODON':[], 'FILTER':[], 'INFO':[], 'FORMAT':[], 'SAMPLE':[]} - # If first position is in the second codon position, we have the two first positions belonging to the same codon and the last one independent. - elif codon_position(lines_queue["REF_CODON"][0], lines_queue["ALT_CODON"][0]) == 1: - write_line = True - num_collapse = 2 - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(lines_queue, num_collapse) - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - for i,j in enumerate(lines_queue): - lines_queue[list(lines_queue.keys())[i]].pop(0) - lines_queue[list(lines_queue.keys())[i]].pop(0) - ## Finally if we have the first position in the last codon position, we write first position and left the remaining two to be evaluated in the next iteration. - elif codon_position(lines_queue["REF_CODON"][0], lines_queue["ALT_CODON"][0]) == 2: - write_line = True - oline =(lines_queue["CHROM"][0] + "\t" + lines_queue["POS"][0] + "\t" + lines_queue["ID"][0] + "\t" + lines_queue["REF"][0] + "\t" + lines_queue["ALT"][0] + "\t" + lines_queue["QUAL"][0] + "\t" + lines_queue["FILTER"][0] + "\t" + lines_queue["INFO"][0] + "\t" + lines_queue["FORMAT"][0] + "\t" + lines_queue["SAMPLE"][0] + "\n") - for i,j in enumerate(lines_queue): - lines_queue[list(lines_queue.keys())[i]].pop(0) - - elif check_consecutive(lines_queue["POS"]) == False: - write_line = True - oline =(lines_queue["CHROM"][0] + "\t" + lines_queue["POS"][0] + "\t" + lines_queue["ID"][0] + "\t" + lines_queue["REF"][0] + "\t" + lines_queue["ALT"][0] + "\t" + lines_queue["QUAL"][0] + "\t" + lines_queue["FILTER"][0] + "\t" + lines_queue["INFO"][0] + "\t" + lines_queue["FORMAT"][0] + "\t" + lines_queue["SAMPLE"][0] + "\n") - for i,j in enumerate(lines_queue): - lines_queue[list(lines_queue.keys())[i]].pop(0) - else: - print("Something went terribly wrong!!" + str(len(lines_queue["POS"]))) - - -def get_lines_info(dict_lines,num_collapse): +def check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt): + # Are two positions in the dict consecutive? + if check_consecutive(list(q_pos)) == 2: + ## If the first position is not on the third position of the codon they are in the same codon. + if codon_position(fe_codon_ref,fe_codon_alt) != 2: + num_collapse = 2 + else: + num_collapse = 1 + # Are the three positions in the dict consecutive? + elif check_consecutive(list(q_pos)) == 3: + ## we check the first position in which codon position is to process it acordingly. + # If first position is in the first codon position all three positions belong to the same codon. + if codon_position(fe_codon_ref,fe_codon_alt) == 0: + num_collapse = 3 + # If first position is in the second codon position, we have the two first positions belonging to the same codon and the last one independent. + elif codon_position(fe_codon_ref,fe_codon_alt) == 1: + num_collapse = 2 + ## Finally if we have the first position in the last codon position, we write first position and left the remaining two to be evaluated in the next iteration. + elif codon_position(fe_codon_ref,fe_codon_alt) == 2: + num_collapse = 1 + + elif check_consecutive(list(q_pos)) == False: + num_collapse = 1 + + return num_collapse + +def process_variants(variants,num_collapse): ''' Description: The function set the vars acordingly to the lines to collapse do to consecutive variants. Input: - dict_lines - Dict with var lines. + variants - Dict with var lines. num_collapse - number of lines to collapse [2,3] Returns:: Vars fixed. ''' - CHROM = dict_lines["CHROM"][0] - POS = dict_lines["POS"][0] - ID = dict_lines["ID"][0] - # If no consecutive, process one line - if int(num_collapse) == 1: - REF = str(dict_lines["REF"][0]) - ALT = str(dict_lines["ALT"][0]) - # If two consecutive process two lines and write one. - elif int(num_collapse) == 2: - REF = str(dict_lines["REF"][0]) + str(dict_lines["REF"][1]) - ALT = str(dict_lines["ALT"][0]) + str(dict_lines["ALT"][1]) - # If three consecutive process three lines and write one - elif int(num_collapse) == 3: - REF = str(dict_lines["REF"][0]) + str(dict_lines["REF"][1]) + str(dict_lines["REF"][2]) - ALT = str(dict_lines["ALT"][0]) + str(dict_lines["ALT"][1]) + str(dict_lines["ALT"][2]) - ## TODO Check how much differences we found among DPs in the three positions of a codon. - REF_DP = dict_lines["REF_DP"][0] - REF_RV = dict_lines["REF_RV"][0] - ALT_DP = dict_lines["ALT_DP"][0] - ALT_RV = dict_lines["ALT_RV"][0] - QUAL = dict_lines["QUAL"][0] - REF_CODON = REF - ALT_CODON = ALT - FILTER =dict_lines["FILTER"][0] - # INFO DP depends on the decision in the todo above. SB is left with the first one. - INFO = dict_lines["INFO"][0] - FORMAT = dict_lines["FORMAT"][0] - # sample depends on the decision in the todo above. - SAMPLE = dict_lines["SAMPLE"][0] - return CHROM,POS,ID,REF,ALT,QUAL,FILTER,INFO,FORMAT,SAMPLE + key_list = ["chrom", "pos", "id", "qual", "info", "format"] + chrom, pos, id, qual, filter, info, format = x for key in key_list next(iter(variants))[key] + # chrom = next(iter(variants))["chrom"] + # pos = next(iter(variants))["pos"] + # id = next(iter(variants))["id"] + # ref_dp = next(iter(variants))["ref_dp"] + # ref_rv = next(iter(variants))["ref_rv"] + # alt_dp = next(iter(variants))["alt_dp"] + # alt_rv = next(iter(variants))["alt_rv"] + # qual = next(iter(variants))["qual"] + # filter = next(iter(variants))["filter"] + # # INFO DP depends on the decision in the todo above. SB is left with the first one. + # info = next(iter(variants))["info"] + # format = next(iter(variants))["format"] + + # If no consecutive, process one variant line + # If two consecutive, process two variant lines into one + # If three consecutive process three variant lines and write one + for i in range(num_collapse): + ref += next(iter(variants))["ref"] + alt += next(iter(variants))["alt"] + + return chrom, pos, id, ref, alt, qual, filter, info, format, sample def make_dir(path): @@ -217,22 +170,49 @@ def make_dir(path): if exception.errno != errno.EEXIST: raise - -def ivar_variants_to_vcf(file_in, file_out, pass_only=False, min_allele_frequency=0, ignore_strand_bias=False, ignore_merge_codons=False): - ''' - Description: - Main function to convert iVar variants TSV to VCF. - Input: - file_in : iVar variants TSV file - file_out : VCF output file - pass_only : Only keep variants that PASS filter [True, False] - min_allele_freq : Minimum allele frequency to keep a variant [0] - ignore_strand_bias : Do not apply strand-bias filter [True, False] - ignore_merge_codons : Do not take into account consecutive positions belong to the same codon. - Returns: - None - ''' - +def parse_ivar_line(line): + if not re.match("REGION", line): + line = re.split("\t", line) + + ## Assign intial fields to variables + CHROM = line[0] + POS = line[1] + ID = "." + REF = line[2] + ALT = line[3] + + ## REF/ALF depths + REF_DP = int(line[4]) + REF_RV = int(line[5]) + REF_FW = REF_DP - REF_RV + ALT_RV = int(line[8]) + ALT_DP = int(line[7]) + ALT_FW = ALT_DP - ALT_RV + FORMAT= [REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] + + ## Codon annotation + REF_CODON = line[15] + ALT_CODON = line[17] + + ## Determine variant type + var_type = "SNP" + if ALT[0] == "+": + ALT = REF + ALT[1:] + var_type = "INS" + elif ALT[0] == "-": + REF += ALT[1:] + ALT = line[2] + var_type = "DEL" + + QUAL = "." + + ## Determine FILTER field + INFO = f"DP={line[11]}" + pass_test = line[13] + + return CHROM, POS, ID, REF, ALT, QUAL, INFO, FORMAT, REF_CODON, ALT_CODON, pass_test, var_type + +def write_vcf_header(ref_len,ignore_strand_bias): ## Define VCF header header_source = [ "##fileformat=VCFv4.2", @@ -266,139 +246,128 @@ def ivar_variants_to_vcf(file_in, file_out, pass_only=False, min_allele_frequenc '##FILTER=' ] header = header_source + header_info + header_filter + header_format + header_cols - - ## Initialise variables - var_list = [] - var_count_dict = {"SNP": 0, "INS": 0, "DEL": 0} - dict_lines = {'CHROM':[], 'POS':[], 'ID':[], 'REF':[], 'ALT':[], 'REF_DP':[], 'REF_RV':[], 'ALT_DP':[], 'ALT_RV':[], 'QUAL':[], 'REF_CODON':[], 'ALT_CODON':[], 'FILTER': [], 'INFO':[], 'FORMAT':[], 'SAMPLE':[]} - write_line = False fout = open(file_out, "w") fout.write('\n'.join(header) + '\n') - with open(file_in, 'r') as fin: - for line in fin: - if not re.match("REGION", line): - line = re.split("\t", line) - - ## Assign intial fields to variables - CHROM = line[0] - POS = line[1] - ID = "." - REF = line[2] - ALT = line[3] - - ## REF/ALF depths - REF_DP = int(line[4]) - REF_RV = int(line[5]) - REF_FW = REF_DP - REF_RV - ALT_RV = int(line[8]) - ALT_DP = int(line[7]) - ALT_FW = ALT_DP - ALT_RV - - ## Perform a fisher_exact test for strand bias detection - table = np.array([[REF_FW, REF_RV], [ALT_FW, ALT_RV]]) - oddsr, pvalue = fisher_exact(table, alternative='greater') - - ## Determine variant type - var_type = "SNP" - if ALT[0] == "+": - ALT = REF + ALT[1:] - var_type = "INS" - elif ALT[0] == "-": - REF += ALT[1:] - ALT = line[2] - var_type = "DEL" - - QUAL = "." - - ## Determine FILTER field - INFO = f"DP={line[11]}" - pass_test = line[13] - if ignore_strand_bias: - if pass_test == "TRUE": - FILTER = "PASS" - else: - FILTER = "ft" - else: - ## Add SB in the FILTER field if strand-bias p-value is significant - if pvalue < 0.05 and pass_test == "TRUE": - FILTER = "sb" - elif pvalue > 0.05 and pass_test == "TRUE": - FILTER = "PASS" - elif pvalue <= 0.05 and pass_test == "FALSE": - FILTER = "ft;sb" - else: - FILTER = "ft" - INFO += f":SB_PV={str(round(pvalue, 5))}" - - FORMAT = "GT:REF_DP:REF_RV:REF_QUAL:ALT_DP:ALT_RV:ALT_QUAL:ALT_FREQ" - SAMPLE = f'1:{":".join(line[4:11])}' - - REF_CODON = line[15] - ALT_CODON = line[17] - param_list = [CHROM, POS, ID, REF, ALT, REF_DP, REF_RV, ALT_DP, ALT_RV, QUAL, REF_CODON, ALT_CODON, FILTER, INFO, FORMAT, SAMPLE] - - if ignore_merge_codons or var_type != "SNP": - write_line = True - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - - else: - ## Determine whether to output variant - if pass_only and FILTER != "PASS": - write_line = False - if float(line[10]) < min_allele_frequency: - write_line = False - if (CHROM, POS, REF, ALT) in var_list: - write_line = False - else: - var_list.append((CHROM, POS, REF, ALT)) - - ## Write to file - if write_line: - var_count_dict[var_type] += 1 - fout.write(oline) - - ## Print variant counts to pass to MultiQC - var_count_list = [(k, str(v)) for k, v in sorted(var_count_dict.items())] - print("\t".join(["sample"] + [x[0] for x in var_count_list])) - print("\t".join([filename] + [x[1] for x in var_count_list])) - - ## Handle last 3 lines. - if len(dict_lines["POS"]) == 2: - if check_consecutive(dict_lines["POS"]) == 2: - if codon_position(dict_lines["REF_CODON"][0],dict_lines["ALT_CODON"][0]) != 2: - write_line = True - num_collapse = 2 - CHROM, POS, ID, REF, ALT, QUAL, FILTER, INFO, FORMAT, SAMPLE = rename_vars(dict_lines, num_collapse) - oline = (CHROM + "\t" + POS + "\t" + ID + "\t" + REF + "\t" + ALT + "\t" + QUAL + "\t" + FILTER + "\t" + INFO + "\t" + FORMAT + "\t" + SAMPLE + "\n") - fout.write(oline) - else: - oline = (dict_lines["CHROM"][0] + "\t" + dict_lines["POS"][0] + "\t" + dict_lines["ID"][0] + "\t" + dict_lines["REF"][0] + "\t" + dict_lines["ALT"][0] + "\t" + dict_lines["QUAL"][0] + "\t" + dict_lines["FILTER"][0] + "\t" + dict_lines["INFO"][0] + "\t" + dict_lines["FORMAT"][0] + "\t" + dict_lines["SAMPLE"][0] + "\n") - oline1 = (dict_lines["CHROM"][1] + "\t" + dict_lines["POS"][1] + "\t" + dict_lines["ID"][1] + "\t" + dict_lines["REF"][1] + "\t" + dict_lines["ALT"][1] + "\t" + dict_lines["QUAL"][1] + "\t" + dict_lines["FILTER"][1] + "\t" + dict_lines["INFO"][1] + "\t" + dict_lines["FORMAT"][1] + "\t" + dict_lines["SAMPLE"][1] + "\n") - fout.write(oline) - fout.write(oline1) - elif len(dict_lines["POS"]) == 1: - oline =(dict_lines["CHROM"][0] + "\t" + dict_lines["POS"][0] + "\t" + dict_lines["ID"][0] + "\t" + dict_lines["REF"][0] + "\t" + dict_lines["ALT"][0] + "\t" + dict_lines["QUAL"][0] + "\t" + dict_lines["FILTER"][0] + "\t" + dict_lines["INFO"][0] + "\t" + dict_lines["FORMAT"][0] + "\t" + dict_lines["SAMPLE"][0] + "\n") - fout.write(oline) fout.close() +def write_vcf_line(chrom, pos, id , ref, alt, filter, qual, info, format): + FORMAT = "GT:REF_DP:REF_RV:REF_QUAL:ALT_DP:ALT_RV:ALT_QUAL:ALT_FREQ" + SAMPLE = f'1:{":".join(format)}' + oline= chrom + "\t" + pos + "\t" + id + "\t" + ref + "\t" + alt + "\t" + qual + "\t" + filter + "\t" + info + "\t" + format + "\t" + sample + "\n" + fout = open(file_out, "a") + fout.write(oline) + +def ivar_filter(pass_test): + if pass_test: + return False + else: + return "ft" + +def strand_bias_filter(format): + # format=[REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] + # table: + ## REF_FW REF_RV + ## ALT_FW ALT_RV + table = np.array([[format[2], format[1]], [format[5], format[4]]]) + oddsr, pvalue = fisher_exact(table, alternative='greater') + + # h0: both strands are equally represented. If test is significant h0 is refused so there is an strand bias. + if pvalue < 0.05: + return "sb" + else: + return False def main(args=None): args = parse_args(args) - ## Create output directory filename = os.path.splitext(file_in)[0] out_dir = os.path.dirname(file_out) + + ## Create output directory make_dir(out_dir) - ivar_variants_to_vcf( - args.file_in, - args.file_out, - args.pass_only, - args.allele_freq_threshold, - args.ignore_strand_bias, - args.ignore_merge_codons, - ) + # Initialize vars + var_list = [] + var_count_dict = {"SNP": 0, "INS": 0, "DEL": 0} + variants = OrderedDict() + q_pos = queue.Queue(maxsize=3) + + with open(file_in, 'r') as fin: + for line in fin: + # Parse line + ## format=[REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] + write_line = True + chrom, pos, id, ref, alt, qual, format, info, ref_codon, alt_codon, pass_test, var_type = parse_ivar_line(line) + + # Process filters + ## ivar fisher test + filter = ivar_filter(pass_test) + ## strand-bias fisher test + if not ignore_strand_bias: + filter += ",".join(strand_bias_filter(FORMAT)) + + if not filter: + filter = "PASS" + + ## Write output to vcf file + ### Filter variants + if pass_only and filter != "PASS": + write_line = False + ### AF filtering. ALT_DP/(ALT_DP+REF_DP) + if float(format[3]/(format[0]+format[3])) < min_allele_frequency: + write_line = False + ### Duplication filter + if (CHROM, POS, REF, ALT) in var_list: + write_line = False + else: + var_list.append((CHROM, POS, REF, ALT)) + + ## Merge consecutive variants belonging to the same codon + if not ignore_merge_codons and var_type == "SNP": + if q_pos.full(): + fe_codon_ref = next(iter(variants))["ref_codon"] + fe_codon_alt = next(iter(variants))["alt_codon"] + num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) + chrom, pos, id, ref, alt, qual, format, info, ref_codon, alt_codon, pass_test, var_type = process_variants(variants,num_collapse) + + ## Empty variants dict and queue accordingly + for i in range(num_collapse): + variants.popitem() + q_pos.get() + else: + write_line = False + q_pos = q_pos.put(pos) + variants[(chrom, pos, ref, alt)] = {"chrom": chrom, + "pos": pos, + "id": id, + "ref": ref, + "alt": alt, + "qual": qual, + "format": format, + "info": info, + "ref_codon": ref_codon, + "alt_codon": alt_codon, + "pass_test": pass_test, + "var_type": var_type + } + + + + ## Write to file + write_vcf_header(ignore_strand_bias) + if write_line: + var_count_dict[var_type] += 1 + write_vcf_line(chrom, pos, id, ref, alt, filter, qual, format, info) + + if not ignore_merge_codons: + ## handle last lines + while not q_pos.empty(): + ## Print variant counts to pass to MultiQC + var_count_list = [(k, str(v)) for k, v in sorted(var_count_dict.items())] + print("\t".join(["sample"] + [x[0] for x in var_count_list])) + print("\t".join([filename] + [x[1] for x in var_count_list])) + if __name__ == "__main__": sys.exit(main()) From fe8adb2b976a076bf57ce4bfa73e9f0c2c300c9c Mon Sep 17 00:00:00 2001 From: saramonzon Date: Wed, 13 Apr 2022 01:59:28 +0200 Subject: [PATCH 13/58] vcf conversion and strand bias working --- bin/ivar_variants_to_vcf.py | 323 +++++++++++++++++++++++------------- 1 file changed, 210 insertions(+), 113 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index 53dd8321..dbf9968b 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -11,6 +11,7 @@ import numpy as np from scipy.stats import fisher_exact + def parse_args(args=None): Description = "Convert iVar variants TSV file to VCF format." Epilog = """Example usage: python ivar_variants_to_vcf.py """ @@ -36,45 +37,45 @@ def parse_args(args=None): "--ignore_strand_bias", default=False, help="Does not take strand bias into account, use this option when not using amplicon sequencing.", - action="store_true" + action="store_true", ) parser.add_argument( "-ic", "--ignore_merge_codons", help="Output variants without taking into account if consecutive positions belong to the same codon.", - action="store_true" + action="store_true", ) return parser.parse_args(args) def check_consecutive(mylist): - ''' + """ Description: This function checks if a list of three or two numbers are consecutive and returns how many items are consecutive. input: my_list - A list of integers return: Number of items consecutive in the list - [False, 1, 2] - ''' + """ my_list = list(map(int, mylist)) ## Check if the list contains consecutive numbers - if sorted(my_list) == list(range(min(my_list), max(my_list)+1)): + if sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): return len(my_list) else: ## If not, and the list is > 1, remove the last item and reevaluate. if len(my_list) > 1: my_list.pop() - if sorted(my_list) == list(range(min(my_list), max(my_list)+1)): + if sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): return len(my_list) else: return False return False -def get_diff_position(seq1,seq2): - ''' +def get_diff_position(seq1, seq2): + """ Description: Function to compare two codon nucleotide sequences (size 3) and retuns the position where it differs. Input: @@ -82,22 +83,23 @@ def get_diff_position(seq1,seq2): seq2 - string size 3 [A,T,C,G]. Ex. "ACC" Returns: Returns position where seq1 != seq2 - ''' + """ if seq1 == "NA": return False ind_diff = [i for i in range(len(seq1)) if seq1[i] != seq2[i]] if len(ind_diff) > 1: - print("There has been an issue, more than one difference between the seqs.") - return False + print("There has been an issue, more than one difference between the seqs.") + return False else: return ind_diff[0] + def check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt): # Are two positions in the dict consecutive? if check_consecutive(list(q_pos)) == 2: ## If the first position is not on the third position of the codon they are in the same codon. - if codon_position(fe_codon_ref,fe_codon_alt) != 2: + if codon_position(fe_codon_ref, fe_codon_alt) != 2: num_collapse = 2 else: num_collapse = 1 @@ -105,22 +107,23 @@ def check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt): elif check_consecutive(list(q_pos)) == 3: ## we check the first position in which codon position is to process it acordingly. # If first position is in the first codon position all three positions belong to the same codon. - if codon_position(fe_codon_ref,fe_codon_alt) == 0: + if codon_position(fe_codon_ref, fe_codon_alt) == 0: num_collapse = 3 # If first position is in the second codon position, we have the two first positions belonging to the same codon and the last one independent. - elif codon_position(fe_codon_ref,fe_codon_alt) == 1: + elif codon_position(fe_codon_ref, fe_codon_alt) == 1: num_collapse = 2 ## Finally if we have the first position in the last codon position, we write first position and left the remaining two to be evaluated in the next iteration. - elif codon_position(fe_codon_ref,fe_codon_alt) == 2: + elif codon_position(fe_codon_ref, fe_codon_alt) == 2: num_collapse = 1 - + # If no consecutive process only one line. elif check_consecutive(list(q_pos)) == False: num_collapse = 1 return num_collapse -def process_variants(variants,num_collapse): - ''' + +def process_variants(variants, num_collapse): + """ Description: The function set the vars acordingly to the lines to collapse do to consecutive variants. Input: @@ -128,9 +131,11 @@ def process_variants(variants,num_collapse): num_collapse - number of lines to collapse [2,3] Returns:: Vars fixed. - ''' + """ key_list = ["chrom", "pos", "id", "qual", "info", "format"] - chrom, pos, id, qual, filter, info, format = x for key in key_list next(iter(variants))[key] + chrom, pos, id, qual, filter, info, format = [ + next(iter(variants))[key] for key in key_list + ] # chrom = next(iter(variants))["chrom"] # pos = next(iter(variants))["pos"] # id = next(iter(variants))["id"] @@ -155,14 +160,14 @@ def process_variants(variants,num_collapse): def make_dir(path): - ''' + """ Description: Create directory if it doesn't exist. Input: path - path where the directory will be created. Returns: None - ''' + """ if not len(path) == 0: try: os.makedirs(path) @@ -170,8 +175,8 @@ def make_dir(path): if exception.errno != errno.EEXIST: raise + def parse_ivar_line(line): - if not re.match("REGION", line): line = re.split("\t", line) ## Assign intial fields to variables @@ -188,7 +193,7 @@ def parse_ivar_line(line): ALT_RV = int(line[8]) ALT_DP = int(line[7]) ALT_FW = ALT_DP - ALT_RV - FORMAT= [REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] + FORMAT = [REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] ## Codon annotation REF_CODON = line[15] @@ -210,20 +215,29 @@ def parse_ivar_line(line): INFO = f"DP={line[11]}" pass_test = line[13] - return CHROM, POS, ID, REF, ALT, QUAL, INFO, FORMAT, REF_CODON, ALT_CODON, pass_test, var_type - -def write_vcf_header(ref_len,ignore_strand_bias): + return ( + CHROM, + POS, + ID, + REF, + ALT, + QUAL, + INFO, + FORMAT, + REF_CODON, + ALT_CODON, + pass_test, + var_type + ) + + +def write_vcf_header(ref_len, ignore_strand_bias, file_out, filename): ## Define VCF header - header_source = [ - "##fileformat=VCFv4.2", - "##source=iVar" - ] - header_info = [ - '##INFO=' - ] + header_source = ["##fileformat=VCFv4.2", "##source=iVar"] + header_info = ['##INFO='] header_filter = [ '##FILTER=', - '##FILTER= 0.05">' + '##FILTER= 0.05">', ] header_format = [ '##FORMAT=', @@ -235,9 +249,7 @@ def write_vcf_header(ref_len,ignore_strand_bias): '##FORMAT=', '##FORMAT=', ] - header_cols = [ - f"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{filename}" - ] + header_cols = [f"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{filename}"] if not ignore_strand_bias: header_info += [ '##INFO=' @@ -247,15 +259,39 @@ def write_vcf_header(ref_len,ignore_strand_bias): ] header = header_source + header_info + header_filter + header_format + header_cols fout = open(file_out, "w") - fout.write('\n'.join(header) + '\n') + fout.write("\n".join(header) + "\n") fout.close() -def write_vcf_line(chrom, pos, id , ref, alt, filter, qual, info, format): - FORMAT = "GT:REF_DP:REF_RV:REF_QUAL:ALT_DP:ALT_RV:ALT_QUAL:ALT_FREQ" - SAMPLE = f'1:{":".join(format)}' - oline= chrom + "\t" + pos + "\t" + id + "\t" + ref + "\t" + alt + "\t" + qual + "\t" + filter + "\t" + info + "\t" + format + "\t" + sample + "\n" + +def write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, file_out): + sample = f'1:{":".join(str(x) for x in format)}' + format = "GT:REF_DP:REF_RV:REF_QUAL:ALT_DP:ALT_RV:ALT_QUAL:ALT_FREQ" + + oline = ( + chrom + + "\t" + + pos + + "\t" + + id + + "\t" + + ref + + "\t" + + alt + + "\t" + + qual + + "\t" + + filter + + "\t" + + info + + "\t" + + format + + "\t" + + sample + + "\n" + ) fout = open(file_out, "a") fout.write(oline) + fout.close() def ivar_filter(pass_test): if pass_test: @@ -263,13 +299,14 @@ def ivar_filter(pass_test): else: return "ft" + def strand_bias_filter(format): # format=[REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] # table: ## REF_FW REF_RV ## ALT_FW ALT_RV - table = np.array([[format[2], format[1]], [format[5], format[4]]]) - oddsr, pvalue = fisher_exact(table, alternative='greater') + table = np.array([ [format[2], format[1]], [format[5], format[4]] ]) + oddsr, pvalue = fisher_exact(table, alternative="greater") # h0: both strands are equally represented. If test is significant h0 is refused so there is an strand bias. if pvalue < 0.05: @@ -277,11 +314,12 @@ def strand_bias_filter(format): else: return False + def main(args=None): args = parse_args(args) - filename = os.path.splitext(file_in)[0] - out_dir = os.path.dirname(file_out) + filename = os.path.splitext(args.file_in)[0] + out_dir = os.path.dirname(args.file_out) ## Create output directory make_dir(out_dir) @@ -292,82 +330,141 @@ def main(args=None): variants = OrderedDict() q_pos = queue.Queue(maxsize=3) - with open(file_in, 'r') as fin: + ## Write header to file + write_vcf_header(29990,args.ignore_strand_bias,args.file_out,filename) + + with open(args.file_in, "r") as fin: for line in fin: - # Parse line - ## format=[REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] - write_line = True - chrom, pos, id, ref, alt, qual, format, info, ref_codon, alt_codon, pass_test, var_type = parse_ivar_line(line) - - # Process filters - ## ivar fisher test - filter = ivar_filter(pass_test) - ## strand-bias fisher test - if not ignore_strand_bias: - filter += ",".join(strand_bias_filter(FORMAT)) - - if not filter: - filter = "PASS" - - ## Write output to vcf file - ### Filter variants - if pass_only and filter != "PASS": - write_line = False - ### AF filtering. ALT_DP/(ALT_DP+REF_DP) - if float(format[3]/(format[0]+format[3])) < min_allele_frequency: - write_line = False - ### Duplication filter - if (CHROM, POS, REF, ALT) in var_list: - write_line = False - else: - var_list.append((CHROM, POS, REF, ALT)) - - ## Merge consecutive variants belonging to the same codon - if not ignore_merge_codons and var_type == "SNP": - if q_pos.full(): - fe_codon_ref = next(iter(variants))["ref_codon"] - fe_codon_alt = next(iter(variants))["alt_codon"] - num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) - chrom, pos, id, ref, alt, qual, format, info, ref_codon, alt_codon, pass_test, var_type = process_variants(variants,num_collapse) - - ## Empty variants dict and queue accordingly - for i in range(num_collapse): - variants.popitem() - q_pos.get() - else: + if not re.match("REGION", line): + # Parse line + ## format=[REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] + write_line = True + ( + chrom, + pos, + id, + ref, + alt, + qual, + info, + format, + ref_codon, + alt_codon, + pass_test, + var_type, + ) = parse_ivar_line(line) + + # Process filters + ## ivar fisher test + filter = "" + if ivar_filter(pass_test): + filter = ivar_filter(pass_test) + print(filter) + ## strand-bias fisher test + if not args.ignore_strand_bias: + if strand_bias_filter(format): + if filter: + filter += "," + strand_bias_filter(format) + else: + filter = strand_bias_filter(format) + + if not filter: + filter = "PASS" + + ### Filter variants + if args.pass_only and filter != "PASS": + write_line = False + ### AF filtering. ALT_DP/(ALT_DP+REF_DP) + if float(format[3] / (format[0] + format[3])) < args.allele_freq_threshold: + write_line = False + ### Duplication filter + if (chrom, pos, ref, alt) in var_list: write_line = False + else: + var_list.append((chrom, pos, ref, alt)) + + ## Merge consecutive variants belonging to the same codon + if not args.ignore_merge_codons and var_type == "SNP": + if q_pos.full(): + fe_codon_ref = next(iter(variants))["ref_codon"] + fe_codon_alt = next(iter(variants))["alt_codon"] + num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) + ( + chrom, + pos, + id, + ref, + alt, + qual, + format, + info, + ref_codon, + alt_codon, + pass_test, + var_type, + ) = process_variants(variants, num_collapse) + + ## Empty variants dict and queue accordingly + for i in range(num_collapse): + variants.popitem() + q_pos.get() + else: + write_line = False + + ## re-fill queue accordingly q_pos = q_pos.put(pos) - variants[(chrom, pos, ref, alt)] = {"chrom": chrom, - "pos": pos, - "id": id, - "ref": ref, - "alt": alt, - "qual": qual, - "format": format, - "info": info, - "ref_codon": ref_codon, - "alt_codon": alt_codon, - "pass_test": pass_test, - "var_type": var_type - } - - - - ## Write to file - write_vcf_header(ignore_strand_bias) - if write_line: - var_count_dict[var_type] += 1 - write_vcf_line(chrom, pos, id, ref, alt, filter, qual, format, info) - - if not ignore_merge_codons: + variants[(chrom, pos, ref, alt)] = { + "chrom": chrom, + "pos": pos, + "id": id, + "ref": ref, + "alt": alt, + "qual": qual, + "format": format, + "info": info, + "ref_codon": ref_codon, + "alt_codon": alt_codon, + "pass_test": pass_test, + "var_type": var_type, + } + + ## Write output to vcf file + if write_line: + var_count_dict[var_type] += 1 + write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, args.file_out) + + if not args.ignore_merge_codons: ## handle last lines while not q_pos.empty(): - + fe_codon_ref = next(iter(variants))["ref_codon"] + fe_codon_alt = next(iter(variants))["alt_codon"] + num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) + ( + chrom, + pos, + id, + ref, + alt, + qual, + format, + info, + ref_codon, + alt_codon, + pass_test, + var_type, + ) = process_variants(variants, num_collapse) + var_count_dict[var_type] += 1 + write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format) + ## Empty variants dict and queue accordingly + for i in range(num_collapse): + variants.popitem() + q_pos.get() ## Print variant counts to pass to MultiQC var_count_list = [(k, str(v)) for k, v in sorted(var_count_dict.items())] print("\t".join(["sample"] + [x[0] for x in var_count_list])) print("\t".join([filename] + [x[1] for x in var_count_list])) + if __name__ == "__main__": sys.exit(main()) From 2f14d0687de65ac64367005eb34e64f56762417e Mon Sep 17 00:00:00 2001 From: saramonzon Date: Wed, 13 Apr 2022 16:47:33 +0200 Subject: [PATCH 14/58] fixed bugs, finished first debugging --- bin/ivar_variants_to_vcf.py | 111 +++++++++++++++++++----------------- 1 file changed, 60 insertions(+), 51 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index dbf9968b..3e08962d 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -5,8 +5,9 @@ import re import errno import argparse -import queue +#import queue from collections import OrderedDict +from collections import deque import numpy as np from scipy.stats import fisher_exact @@ -59,13 +60,18 @@ def check_consecutive(mylist): Number of items consecutive in the list - [False, 1, 2] """ my_list = list(map(int, mylist)) - + print("LIST INSIDE CHECKCONSECUTIVE: ") + print(my_list) ## Check if the list contains consecutive numbers - if sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): + if len(my_list) == 1: + return False + elif sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): + print(my_list) return len(my_list) else: ## If not, and the list is > 1, remove the last item and reevaluate. - if len(my_list) > 1: + if len(my_list) > 2: + print(my_list) my_list.pop() if sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): return len(my_list) @@ -97,9 +103,11 @@ def get_diff_position(seq1, seq2): def check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt): # Are two positions in the dict consecutive? + print("CHECK CONSECUTIVE: " + str(check_consecutive(list(q_pos)))) if check_consecutive(list(q_pos)) == 2: + print("GETDIFFPOS WHEN TWO CONSECUTIVE " + str(get_diff_position(fe_codon_ref, fe_codon_alt))) ## If the first position is not on the third position of the codon they are in the same codon. - if codon_position(fe_codon_ref, fe_codon_alt) != 2: + if get_diff_position(fe_codon_ref, fe_codon_alt) != 2: num_collapse = 2 else: num_collapse = 1 @@ -107,13 +115,14 @@ def check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt): elif check_consecutive(list(q_pos)) == 3: ## we check the first position in which codon position is to process it acordingly. # If first position is in the first codon position all three positions belong to the same codon. - if codon_position(fe_codon_ref, fe_codon_alt) == 0: + print("GETDIFFPOS WHEN THREE CONSECUTIVE " + str(get_diff_position(fe_codon_ref, fe_codon_alt))) + if get_diff_position(fe_codon_ref, fe_codon_alt) == 0: num_collapse = 3 # If first position is in the second codon position, we have the two first positions belonging to the same codon and the last one independent. - elif codon_position(fe_codon_ref, fe_codon_alt) == 1: + elif get_diff_position(fe_codon_ref, fe_codon_alt) == 1: num_collapse = 2 ## Finally if we have the first position in the last codon position, we write first position and left the remaining two to be evaluated in the next iteration. - elif codon_position(fe_codon_ref, fe_codon_alt) == 2: + elif get_diff_position(fe_codon_ref, fe_codon_alt) == 2: num_collapse = 1 # If no consecutive process only one line. elif check_consecutive(list(q_pos)) == False: @@ -132,9 +141,9 @@ def process_variants(variants, num_collapse): Returns:: Vars fixed. """ - key_list = ["chrom", "pos", "id", "qual", "info", "format"] + key_list = ["chrom", "pos", "id", "qual", "filter", "info", "format"] chrom, pos, id, qual, filter, info, format = [ - next(iter(variants))[key] for key in key_list + variants[next(iter(variants))][key] for key in key_list ] # chrom = next(iter(variants))["chrom"] # pos = next(iter(variants))["pos"] @@ -152,11 +161,13 @@ def process_variants(variants, num_collapse): # If no consecutive, process one variant line # If two consecutive, process two variant lines into one # If three consecutive process three variant lines and write one + ref = "" + alt = "" for i in range(num_collapse): - ref += next(iter(variants))["ref"] - alt += next(iter(variants))["alt"] + ref += variants[next(iter(variants))]["ref"] + alt += variants[next(iter(variants))]["alt"] - return chrom, pos, id, ref, alt, qual, filter, info, format, sample + return chrom, pos, id, ref, alt, qual, filter, info, format def make_dir(path): @@ -328,7 +339,7 @@ def main(args=None): var_list = [] var_count_dict = {"SNP": 0, "INS": 0, "DEL": 0} variants = OrderedDict() - q_pos = queue.Queue(maxsize=3) + q_pos = deque([],maxlen=3) ## Write header to file write_vcf_header(29990,args.ignore_strand_bias,args.file_out,filename) @@ -385,9 +396,29 @@ def main(args=None): ## Merge consecutive variants belonging to the same codon if not args.ignore_merge_codons and var_type == "SNP": - if q_pos.full(): - fe_codon_ref = next(iter(variants))["ref_codon"] - fe_codon_alt = next(iter(variants))["alt_codon"] + + ## re-fill queue accordingly + q_pos.append(pos) + variants[(chrom, pos, ref, alt)] = { + "chrom": chrom, + "pos": pos, + "id": id, + "ref": ref, + "alt": alt, + "qual": qual, + "filter": filter, + "info": info, + "format": format, + "ref_codon": ref_codon, + "alt_codon": alt_codon + } + + if len(q_pos) == q_pos.maxlen : + print(q_pos) + print(variants) + print("longitud cola:" + str(len(q_pos))) + fe_codon_ref = variants[next(iter(variants))]["ref_codon"] + fe_codon_alt = variants[next(iter(variants))]["alt_codon"] num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) ( chrom, @@ -396,38 +427,18 @@ def main(args=None): ref, alt, qual, - format, + filter, info, - ref_codon, - alt_codon, - pass_test, - var_type, + format ) = process_variants(variants, num_collapse) ## Empty variants dict and queue accordingly for i in range(num_collapse): - variants.popitem() - q_pos.get() + variants.popitem(last=False) + q_pos.popleft() else: write_line = False - ## re-fill queue accordingly - q_pos = q_pos.put(pos) - variants[(chrom, pos, ref, alt)] = { - "chrom": chrom, - "pos": pos, - "id": id, - "ref": ref, - "alt": alt, - "qual": qual, - "format": format, - "info": info, - "ref_codon": ref_codon, - "alt_codon": alt_codon, - "pass_test": pass_test, - "var_type": var_type, - } - ## Write output to vcf file if write_line: var_count_dict[var_type] += 1 @@ -435,9 +446,9 @@ def main(args=None): if not args.ignore_merge_codons: ## handle last lines - while not q_pos.empty(): - fe_codon_ref = next(iter(variants))["ref_codon"] - fe_codon_alt = next(iter(variants))["alt_codon"] + while len(q_pos) > 0: + fe_codon_ref = variants[next(iter(variants))]["ref_codon"] + fe_codon_alt = variants[next(iter(variants))]["alt_codon"] num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) ( chrom, @@ -446,19 +457,17 @@ def main(args=None): ref, alt, qual, - format, + filter, info, - ref_codon, - alt_codon, - pass_test, - var_type, + format ) = process_variants(variants, num_collapse) + var_count_dict[var_type] += 1 - write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format) + write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, args.file_out) ## Empty variants dict and queue accordingly for i in range(num_collapse): variants.popitem() - q_pos.get() + q_pos.pop() ## Print variant counts to pass to MultiQC var_count_list = [(k, str(v)) for k, v in sorted(var_count_dict.items())] From 15148989a40b5823971669fb77467fbb99786bca Mon Sep 17 00:00:00 2001 From: saramonzon Date: Wed, 13 Apr 2022 16:48:14 +0200 Subject: [PATCH 15/58] black linting --- bin/ivar_variants_to_vcf.py | 172 ++++++++++++++++++++---------------- 1 file changed, 95 insertions(+), 77 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index 3e08962d..427dc1d0 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -5,7 +5,8 @@ import re import errno import argparse -#import queue + +# import queue from collections import OrderedDict from collections import deque @@ -105,7 +106,10 @@ def check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt): # Are two positions in the dict consecutive? print("CHECK CONSECUTIVE: " + str(check_consecutive(list(q_pos)))) if check_consecutive(list(q_pos)) == 2: - print("GETDIFFPOS WHEN TWO CONSECUTIVE " + str(get_diff_position(fe_codon_ref, fe_codon_alt))) + print( + "GETDIFFPOS WHEN TWO CONSECUTIVE " + + str(get_diff_position(fe_codon_ref, fe_codon_alt)) + ) ## If the first position is not on the third position of the codon they are in the same codon. if get_diff_position(fe_codon_ref, fe_codon_alt) != 2: num_collapse = 2 @@ -115,7 +119,10 @@ def check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt): elif check_consecutive(list(q_pos)) == 3: ## we check the first position in which codon position is to process it acordingly. # If first position is in the first codon position all three positions belong to the same codon. - print("GETDIFFPOS WHEN THREE CONSECUTIVE " + str(get_diff_position(fe_codon_ref, fe_codon_alt))) + print( + "GETDIFFPOS WHEN THREE CONSECUTIVE " + + str(get_diff_position(fe_codon_ref, fe_codon_alt)) + ) if get_diff_position(fe_codon_ref, fe_codon_alt) == 0: num_collapse = 3 # If first position is in the second codon position, we have the two first positions belonging to the same codon and the last one independent. @@ -188,58 +195,58 @@ def make_dir(path): def parse_ivar_line(line): - line = re.split("\t", line) - - ## Assign intial fields to variables - CHROM = line[0] - POS = line[1] - ID = "." - REF = line[2] - ALT = line[3] - - ## REF/ALF depths - REF_DP = int(line[4]) - REF_RV = int(line[5]) - REF_FW = REF_DP - REF_RV - ALT_RV = int(line[8]) - ALT_DP = int(line[7]) - ALT_FW = ALT_DP - ALT_RV - FORMAT = [REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] - - ## Codon annotation - REF_CODON = line[15] - ALT_CODON = line[17] - - ## Determine variant type - var_type = "SNP" - if ALT[0] == "+": - ALT = REF + ALT[1:] - var_type = "INS" - elif ALT[0] == "-": - REF += ALT[1:] - ALT = line[2] - var_type = "DEL" - - QUAL = "." - - ## Determine FILTER field - INFO = f"DP={line[11]}" - pass_test = line[13] - - return ( - CHROM, - POS, - ID, - REF, - ALT, - QUAL, - INFO, - FORMAT, - REF_CODON, - ALT_CODON, - pass_test, - var_type - ) + line = re.split("\t", line) + + ## Assign intial fields to variables + CHROM = line[0] + POS = line[1] + ID = "." + REF = line[2] + ALT = line[3] + + ## REF/ALF depths + REF_DP = int(line[4]) + REF_RV = int(line[5]) + REF_FW = REF_DP - REF_RV + ALT_RV = int(line[8]) + ALT_DP = int(line[7]) + ALT_FW = ALT_DP - ALT_RV + FORMAT = [REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] + + ## Codon annotation + REF_CODON = line[15] + ALT_CODON = line[17] + + ## Determine variant type + var_type = "SNP" + if ALT[0] == "+": + ALT = REF + ALT[1:] + var_type = "INS" + elif ALT[0] == "-": + REF += ALT[1:] + ALT = line[2] + var_type = "DEL" + + QUAL = "." + + ## Determine FILTER field + INFO = f"DP={line[11]}" + pass_test = line[13] + + return ( + CHROM, + POS, + ID, + REF, + ALT, + QUAL, + INFO, + FORMAT, + REF_CODON, + ALT_CODON, + pass_test, + var_type, + ) def write_vcf_header(ref_len, ignore_strand_bias, file_out, filename): @@ -304,6 +311,7 @@ def write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, file_ou fout.write(oline) fout.close() + def ivar_filter(pass_test): if pass_test: return False @@ -316,7 +324,7 @@ def strand_bias_filter(format): # table: ## REF_FW REF_RV ## ALT_FW ALT_RV - table = np.array([ [format[2], format[1]], [format[5], format[4]] ]) + table = np.array([[format[2], format[1]], [format[5], format[4]]]) oddsr, pvalue = fisher_exact(table, alternative="greater") # h0: both strands are equally represented. If test is significant h0 is refused so there is an strand bias. @@ -339,10 +347,10 @@ def main(args=None): var_list = [] var_count_dict = {"SNP": 0, "INS": 0, "DEL": 0} variants = OrderedDict() - q_pos = deque([],maxlen=3) + q_pos = deque([], maxlen=3) ## Write header to file - write_vcf_header(29990,args.ignore_strand_bias,args.file_out,filename) + write_vcf_header(29990, args.ignore_strand_bias, args.file_out, filename) with open(args.file_in, "r") as fin: for line in fin: @@ -368,7 +376,7 @@ def main(args=None): # Process filters ## ivar fisher test filter = "" - if ivar_filter(pass_test): + if ivar_filter(pass_test): filter = ivar_filter(pass_test) print(filter) ## strand-bias fisher test @@ -386,7 +394,10 @@ def main(args=None): if args.pass_only and filter != "PASS": write_line = False ### AF filtering. ALT_DP/(ALT_DP+REF_DP) - if float(format[3] / (format[0] + format[3])) < args.allele_freq_threshold: + if ( + float(format[3] / (format[0] + format[3])) + < args.allele_freq_threshold + ): write_line = False ### Duplication filter if (chrom, pos, ref, alt) in var_list: @@ -410,16 +421,18 @@ def main(args=None): "info": info, "format": format, "ref_codon": ref_codon, - "alt_codon": alt_codon + "alt_codon": alt_codon, } - if len(q_pos) == q_pos.maxlen : + if len(q_pos) == q_pos.maxlen: print(q_pos) print(variants) print("longitud cola:" + str(len(q_pos))) fe_codon_ref = variants[next(iter(variants))]["ref_codon"] fe_codon_alt = variants[next(iter(variants))]["alt_codon"] - num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) + num_collapse = check_merge_codons( + q_pos, fe_codon_ref, fe_codon_alt + ) ( chrom, pos, @@ -429,7 +442,7 @@ def main(args=None): qual, filter, info, - format + format, ) = process_variants(variants, num_collapse) ## Empty variants dict and queue accordingly @@ -442,7 +455,18 @@ def main(args=None): ## Write output to vcf file if write_line: var_count_dict[var_type] += 1 - write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, args.file_out) + write_vcf_line( + chrom, + pos, + id, + ref, + alt, + filter, + qual, + info, + format, + args.file_out, + ) if not args.ignore_merge_codons: ## handle last lines @@ -450,20 +474,14 @@ def main(args=None): fe_codon_ref = variants[next(iter(variants))]["ref_codon"] fe_codon_alt = variants[next(iter(variants))]["alt_codon"] num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) - ( - chrom, - pos, - id, - ref, - alt, - qual, - filter, - info, - format - ) = process_variants(variants, num_collapse) + (chrom, pos, id, ref, alt, qual, filter, info, format) = process_variants( + variants, num_collapse + ) var_count_dict[var_type] += 1 - write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, args.file_out) + write_vcf_line( + chrom, pos, id, ref, alt, filter, qual, info, format, args.file_out + ) ## Empty variants dict and queue accordingly for i in range(num_collapse): variants.popitem() From 780be420bb9e02af7c8161a4538bca2d048e08b3 Mon Sep 17 00:00:00 2001 From: saramonzon Date: Wed, 13 Apr 2022 17:32:27 +0200 Subject: [PATCH 16/58] comments and function reordering --- bin/ivar_variants_to_vcf.py | 389 +++++++++++++++++++++--------------- 1 file changed, 224 insertions(+), 165 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index 427dc1d0..b965a198 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -5,8 +5,6 @@ import re import errno import argparse - -# import queue from collections import OrderedDict from collections import deque @@ -51,132 +49,6 @@ def parse_args(args=None): return parser.parse_args(args) -def check_consecutive(mylist): - """ - Description: - This function checks if a list of three or two numbers are consecutive and returns how many items are consecutive. - input: - my_list - A list of integers - return: - Number of items consecutive in the list - [False, 1, 2] - """ - my_list = list(map(int, mylist)) - print("LIST INSIDE CHECKCONSECUTIVE: ") - print(my_list) - ## Check if the list contains consecutive numbers - if len(my_list) == 1: - return False - elif sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): - print(my_list) - return len(my_list) - else: - ## If not, and the list is > 1, remove the last item and reevaluate. - if len(my_list) > 2: - print(my_list) - my_list.pop() - if sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): - return len(my_list) - else: - return False - return False - - -def get_diff_position(seq1, seq2): - """ - Description: - Function to compare two codon nucleotide sequences (size 3) and retuns the position where it differs. - Input: - seq1 - string size 3 [A,T,C,G]. Ex. "ATC" - seq2 - string size 3 [A,T,C,G]. Ex. "ACC" - Returns: - Returns position where seq1 != seq2 - """ - if seq1 == "NA": - return False - - ind_diff = [i for i in range(len(seq1)) if seq1[i] != seq2[i]] - if len(ind_diff) > 1: - print("There has been an issue, more than one difference between the seqs.") - return False - else: - return ind_diff[0] - - -def check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt): - # Are two positions in the dict consecutive? - print("CHECK CONSECUTIVE: " + str(check_consecutive(list(q_pos)))) - if check_consecutive(list(q_pos)) == 2: - print( - "GETDIFFPOS WHEN TWO CONSECUTIVE " - + str(get_diff_position(fe_codon_ref, fe_codon_alt)) - ) - ## If the first position is not on the third position of the codon they are in the same codon. - if get_diff_position(fe_codon_ref, fe_codon_alt) != 2: - num_collapse = 2 - else: - num_collapse = 1 - # Are the three positions in the dict consecutive? - elif check_consecutive(list(q_pos)) == 3: - ## we check the first position in which codon position is to process it acordingly. - # If first position is in the first codon position all three positions belong to the same codon. - print( - "GETDIFFPOS WHEN THREE CONSECUTIVE " - + str(get_diff_position(fe_codon_ref, fe_codon_alt)) - ) - if get_diff_position(fe_codon_ref, fe_codon_alt) == 0: - num_collapse = 3 - # If first position is in the second codon position, we have the two first positions belonging to the same codon and the last one independent. - elif get_diff_position(fe_codon_ref, fe_codon_alt) == 1: - num_collapse = 2 - ## Finally if we have the first position in the last codon position, we write first position and left the remaining two to be evaluated in the next iteration. - elif get_diff_position(fe_codon_ref, fe_codon_alt) == 2: - num_collapse = 1 - # If no consecutive process only one line. - elif check_consecutive(list(q_pos)) == False: - num_collapse = 1 - - return num_collapse - - -def process_variants(variants, num_collapse): - """ - Description: - The function set the vars acordingly to the lines to collapse do to consecutive variants. - Input: - variants - Dict with var lines. - num_collapse - number of lines to collapse [2,3] - Returns:: - Vars fixed. - """ - key_list = ["chrom", "pos", "id", "qual", "filter", "info", "format"] - chrom, pos, id, qual, filter, info, format = [ - variants[next(iter(variants))][key] for key in key_list - ] - # chrom = next(iter(variants))["chrom"] - # pos = next(iter(variants))["pos"] - # id = next(iter(variants))["id"] - # ref_dp = next(iter(variants))["ref_dp"] - # ref_rv = next(iter(variants))["ref_rv"] - # alt_dp = next(iter(variants))["alt_dp"] - # alt_rv = next(iter(variants))["alt_rv"] - # qual = next(iter(variants))["qual"] - # filter = next(iter(variants))["filter"] - # # INFO DP depends on the decision in the todo above. SB is left with the first one. - # info = next(iter(variants))["info"] - # format = next(iter(variants))["format"] - - # If no consecutive, process one variant line - # If two consecutive, process two variant lines into one - # If three consecutive process three variant lines and write one - ref = "" - alt = "" - for i in range(num_collapse): - ref += variants[next(iter(variants))]["ref"] - alt += variants[next(iter(variants))]["alt"] - - return chrom, pos, id, ref, alt, qual, filter, info, format - - def make_dir(path): """ Description: @@ -195,6 +67,14 @@ def make_dir(path): def parse_ivar_line(line): + """ + Description: + Parse ivar line to get needed variables for vcf format. + input: + line - ivar tsv line + return: + CHROM, POS, ID, REF, ALT, QUAL, INFO, FORMAT, REF_CODON, ALT_CODON, pass_test, var_type + """ line = re.split("\t", line) ## Assign intial fields to variables @@ -249,7 +129,62 @@ def parse_ivar_line(line): ) -def write_vcf_header(ref_len, ignore_strand_bias, file_out, filename): +###################### +## FILTER FUNCTIONS ## +###################### + + +def ivar_filter(pass_test): + """ + Description: + process ivar filter into vcf filter format. + input: + pass_test - ivar fisher exact test [ True, False ] + return: + Whether it passes the filter or not. [False, "ft"] + """ + if pass_test: + return False + else: + return "ft" + + +def strand_bias_filter(format): + """ + Description: + Calculate strand-bias fisher test. + input: + format - format variables + return: + Whether it passes the filter or not. [False, "sb"] + """ + # format=[REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] + # table: + ## REF_FW REF_RV + ## ALT_FW ALT_RV + table = np.array([[format[2], format[1]], [format[5], format[4]]]) + oddsr, pvalue = fisher_exact(table, alternative="greater") + + # h0: both strands are equally represented. + # If test is significant h0 is refused so there is an strand bias. + if pvalue < 0.05: + return "sb" + else: + return False + + +def write_vcf_header(ref, ignore_strand_bias, file_out, filename): + """ + Description: + Write vcf header for VCFv4.2 + input: + ref - (optional), ref in fasta format + ignore_strand_bias - if no strand-bias is calculated [True, False] + file_out - output file_in + filename - name of the output file + return: + Nothing. + """ ## Define VCF header header_source = ["##fileformat=VCFv4.2", "##source=iVar"] header_info = ['##INFO='] @@ -282,6 +217,15 @@ def write_vcf_header(ref_len, ignore_strand_bias, file_out, filename): def write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, file_out): + """ + Description: + Format variables into vcf line format and write line to file. + input: + chrom, pos, id, ref, alt, filter, qual, info, format - vcf variables + file_out - file output + return: + Nothing. + """ sample = f'1:{":".join(str(x) for x in format)}' format = "GT:REF_DP:REF_RV:REF_QUAL:ALT_DP:ALT_RV:ALT_QUAL:ALT_FREQ" @@ -312,50 +256,156 @@ def write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, file_ou fout.close() -def ivar_filter(pass_test): - if pass_test: +############################ +## MERGE CODONS FUNCTIONS ## +############################ + + +def check_consecutive(mylist): + """ + Description: + This function checks a list of numbers and returns how many items are consecutive. + input: + my_list - A list of integers + return: + Number of items consecutive in the list - [False, 2, 3,..] + """ + my_list = list(map(int, mylist)) + ## Check if the list contains consecutive numbers + if len(my_list) == 1: return False + elif sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): + return len(my_list) else: - return "ft" + ## If not, and the list is > 1, remove the last item and reevaluate. + if len(my_list) > 2: + my_list.pop() + if sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): + return len(my_list) + else: + return False + return False -def strand_bias_filter(format): - # format=[REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] - # table: - ## REF_FW REF_RV - ## ALT_FW ALT_RV - table = np.array([[format[2], format[1]], [format[5], format[4]]]) - oddsr, pvalue = fisher_exact(table, alternative="greater") +def get_diff_position(seq1, seq2): + """ + Description: + Function to compare two codon nucleotide sequences (size 3) and retuns the position where it differs. + Input: + seq1 - string size 3 [A,T,C,G]. Ex. "ATC" + seq2 - string size 3 [A,T,C,G]. Ex. "ACC" + Returns: + Returns position where seq1 != seq2 + """ + if seq1 == "NA": + return False - # h0: both strands are equally represented. If test is significant h0 is refused so there is an strand bias. - if pvalue < 0.05: - return "sb" - else: + ind_diff = [i for i in range(len(seq1)) if seq1[i] != seq2[i]] + if len(ind_diff) > 1: + print("There has been an issue, more than one difference between the seqs.") return False + else: + return ind_diff[0] + + +def check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt): + """ + Description: + Logic for determine if variant lines need to be collapsed into one determining + if they are consecutive and belong to the same codon. + Input: + qpos - list of positions. Ex. [4441, 4442, 4443] + fe_codon_ref - first position codon annotation for ref. Ex. "ATG" + fe_codon_alt - first position codon annotation for alt. Ex. "AGG" + Returns: + Returns num_collapse. Number of lines that need to be collapsed into one. + """ + # Are two positions in the queue consecutive? + # q_pos = [4441, 4442, 5067] + num_collapse = 0 + if check_consecutive(list(q_pos)) == 2: + ## If the first position is not on the third position of the codon they are in the same codon. + if get_diff_position(fe_codon_ref, fe_codon_alt) != 2: + num_collapse = 2 + else: + num_collapse = 1 + # Are the three positions in the queue consecutive? + # q_pos = [4441, 4442, 4443] + elif check_consecutive(list(q_pos)) == 3: + ## we check the first position in which codon position is to process it acordingly. + # If first position is in the first codon position all three positions belong to the same codon. + if get_diff_position(fe_codon_ref, fe_codon_alt) == 0: + num_collapse = 3 + # If first position is in the second codon position, we have the two first positions belonging to the same codon and the last one independent. + elif get_diff_position(fe_codon_ref, fe_codon_alt) == 1: + num_collapse = 2 + ## Finally if we have the first position in the last codon position, we write first position and left the remaining two to be evaluated in the next iteration. + elif get_diff_position(fe_codon_ref, fe_codon_alt) == 2: + num_collapse = 1 + # If no consecutive process only one line. + elif check_consecutive(list(q_pos)) == False: + num_collapse = 1 + + return num_collapse + + +def process_variants(variants, num_collapse): + """ + Description: + The function set the variables acordingly to the lines to collapse do to consecutive variants. + Input: + variants - Dict with var lines. + num_collapse - number of lines to collapse [2,3] + Returns:: + Vars fixed: chrom, pos, id, ref, alt, qual, filter, info, format + """ + key_list = ["chrom", "pos", "id", "qual", "filter", "info", "format"] + chrom, pos, id, qual, filter, info, format = [ + variants[next(iter(variants))][key] for key in key_list + ] + + # If no consecutive, process one variant line + # If two consecutive, process two variant lines into one + # If three consecutive process three variant lines and write one + ref = "" + alt = "" + for i in range(num_collapse): + ref += variants[next(iter(variants))]["ref"] + alt += variants[next(iter(variants))]["alt"] + + return chrom, pos, id, ref, alt, qual, filter, info, format def main(args=None): + # Process args args = parse_args(args) + # Initialize vars filename = os.path.splitext(args.file_in)[0] out_dir = os.path.dirname(args.file_out) + var_list = [] # store variants + var_count_dict = {"SNP": 0, "INS": 0, "DEL": 0} # variant counts + variants = OrderedDict() # variant dict (merge codon) + q_pos = deque([], maxlen=3) # pos fifo queue (merge codon) - ## Create output directory + # Create output directory make_dir(out_dir) - # Initialize vars - var_list = [] - var_count_dict = {"SNP": 0, "INS": 0, "DEL": 0} - variants = OrderedDict() - q_pos = deque([], maxlen=3) - - ## Write header to file + ########################## + ## Write header to file ## + ########################## write_vcf_header(29990, args.ignore_strand_bias, args.file_out, filename) + ################################# + ## Read and process input file ## + ################################# with open(args.file_in, "r") as fin: for line in fin: if not re.match("REGION", line): - # Parse line + + ################ + ## Parse line ## + ################ ## format=[REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] write_line = True ( @@ -373,7 +423,9 @@ def main(args=None): var_type, ) = parse_ivar_line(line) - # Process filters + ##################### + ## Process filters ## + ##################### ## ivar fisher test filter = "" if ivar_filter(pass_test): @@ -390,7 +442,9 @@ def main(args=None): if not filter: filter = "PASS" - ### Filter variants + ##################### + ## Filter variants ## + ##################### if args.pass_only and filter != "PASS": write_line = False ### AF filtering. ALT_DP/(ALT_DP+REF_DP) @@ -405,10 +459,12 @@ def main(args=None): else: var_list.append((chrom, pos, ref, alt)) - ## Merge consecutive variants belonging to the same codon + ############################################################ + ## MERGE_CODONS ## + ## Merge consecutive variants belonging to the same codon ## + ############################################################ if not args.ignore_merge_codons and var_type == "SNP": - - ## re-fill queue accordingly + ## re-fill queue and dict accordingly q_pos.append(pos) variants[(chrom, pos, ref, alt)] = { "chrom": chrom, @@ -425,9 +481,6 @@ def main(args=None): } if len(q_pos) == q_pos.maxlen: - print(q_pos) - print(variants) - print("longitud cola:" + str(len(q_pos))) fe_codon_ref = variants[next(iter(variants))]["ref_codon"] fe_codon_alt = variants[next(iter(variants))]["alt_codon"] num_collapse = check_merge_codons( @@ -452,7 +505,9 @@ def main(args=None): else: write_line = False - ## Write output to vcf file + ############################## + ## Write output to vcf file ## + ############################## if write_line: var_count_dict[var_type] += 1 write_vcf_line( @@ -469,7 +524,9 @@ def main(args=None): ) if not args.ignore_merge_codons: - ## handle last lines + ####################### + ## handle last lines ## + ####################### while len(q_pos) > 0: fe_codon_ref = variants[next(iter(variants))]["ref_codon"] fe_codon_alt = variants[next(iter(variants))]["alt_codon"] @@ -487,10 +544,12 @@ def main(args=None): variants.popitem() q_pos.pop() - ## Print variant counts to pass to MultiQC + ############################################# + ## variant counts to pass to MultiQC ## + ############################################# var_count_list = [(k, str(v)) for k, v in sorted(var_count_dict.items())] - print("\t".join(["sample"] + [x[0] for x in var_count_list])) - print("\t".join([filename] + [x[1] for x in var_count_list])) + ("\t".join(["sample"] + [x[0] for x in var_count_list])) + ("\t".join([filename] + [x[1] for x in var_count_list])) if __name__ == "__main__": From a4067ec177026df7edf1634760edd62b66aff04c Mon Sep 17 00:00:00 2001 From: saramonzon Date: Wed, 13 Apr 2022 18:34:33 +0200 Subject: [PATCH 17/58] fix variants iterator --- bin/ivar_variants_to_vcf.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index b965a198..ce33427b 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -359,6 +359,7 @@ def process_variants(variants, num_collapse): Returns:: Vars fixed: chrom, pos, id, ref, alt, qual, filter, info, format """ + # Collapsed variant parameters equal to first variant key_list = ["chrom", "pos", "id", "qual", "filter", "info", "format"] chrom, pos, id, qual, filter, info, format = [ variants[next(iter(variants))][key] for key in key_list @@ -369,9 +370,11 @@ def process_variants(variants, num_collapse): # If three consecutive process three variant lines and write one ref = "" alt = "" + iter_variants = iter(variants) for i in range(num_collapse): - ref += variants[next(iter(variants))]["ref"] - alt += variants[next(iter(variants))]["alt"] + var = next(iter_variants) + ref += variants[var]["ref"] + alt += variants[var]["alt"] return chrom, pos, id, ref, alt, qual, filter, info, format From a5a0a40bf75fec38246e3f296ebdcf72180dfbd2 Mon Sep 17 00:00:00 2001 From: saramonzon Date: Wed, 13 Apr 2022 18:58:53 +0200 Subject: [PATCH 18/58] fixed bug in format field and last lines handling --- bin/ivar_variants_to_vcf.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index ce33427b..b4b0b29f 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -84,14 +84,17 @@ def parse_ivar_line(line): REF = line[2] ALT = line[3] - ## REF/ALF depths + ## REF/ALF depths and quals REF_DP = int(line[4]) REF_RV = int(line[5]) REF_FW = REF_DP - REF_RV + REF_QUAL = int(line[6]) ALT_RV = int(line[8]) ALT_DP = int(line[7]) ALT_FW = ALT_DP - ALT_RV - FORMAT = [REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] + ALT_QUAL = int(line[9]) + ALT_FREQ = float(line[10]) + FORMAT = [REF_DP, REF_RV, REF_QUAL, ALT_DP, ALT_RV, ALT_QUAL, ALT_FREQ] ## Codon annotation REF_CODON = line[15] @@ -143,7 +146,7 @@ def ivar_filter(pass_test): return: Whether it passes the filter or not. [False, "ft"] """ - if pass_test: + if pass_test == "TRUE": return False else: return "ft" @@ -158,11 +161,11 @@ def strand_bias_filter(format): return: Whether it passes the filter or not. [False, "sb"] """ - # format=[REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] + # format=[REF_DP, REF_RV, REF_QUAL, ALT_DP, ALT_RV, ALT_QUAL, ALT_FREQ] # table: ## REF_FW REF_RV ## ALT_FW ALT_RV - table = np.array([[format[2], format[1]], [format[5], format[4]]]) + table = np.array([[format[0] - format[1], format[1]], [format[3] - format [4], format[4]]]) oddsr, pvalue = fisher_exact(table, alternative="greater") # h0: both strands are equally represented. @@ -394,9 +397,9 @@ def main(args=None): # Create output directory make_dir(out_dir) - ########################## - ## Write header to file ## - ########################## + ############################## + ## Write vcf header to file ## + ############################## write_vcf_header(29990, args.ignore_strand_bias, args.file_out, filename) ################################# @@ -409,7 +412,8 @@ def main(args=None): ################ ## Parse line ## ################ - ## format=[REF_DP, REF_RV, REF_FW, ALT_DP, ALT_RV, ALT_FW] + ## format= + # [REF_DP, REF_RV, REF_QUAL, ALT_DP, ALT_RV, ALT_QUAL, ALT_FREQ] write_line = True ( chrom, @@ -425,7 +429,6 @@ def main(args=None): pass_test, var_type, ) = parse_ivar_line(line) - ##################### ## Process filters ## ##################### @@ -433,7 +436,6 @@ def main(args=None): filter = "" if ivar_filter(pass_test): filter = ivar_filter(pass_test) - print(filter) ## strand-bias fisher test if not args.ignore_strand_bias: if strand_bias_filter(format): @@ -544,8 +546,8 @@ def main(args=None): ) ## Empty variants dict and queue accordingly for i in range(num_collapse): - variants.popitem() - q_pos.pop() + variants.popitem(last=False) + q_pos.popleft() ############################################# ## variant counts to pass to MultiQC ## From 9bff8a5fabcd702bb1fcf15bf0be3fdf1767427a Mon Sep 17 00:00:00 2001 From: saramonzon Date: Wed, 13 Apr 2022 19:05:00 +0200 Subject: [PATCH 19/58] minor fixed comment --- bin/ivar_variants_to_vcf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index b4b0b29f..4bb9ca5b 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -550,7 +550,7 @@ def main(args=None): q_pos.popleft() ############################################# - ## variant counts to pass to MultiQC ## + ## variant counts to pass to MultiQC ## ############################################# var_count_list = [(k, str(v)) for k, v in sorted(var_count_dict.items())] ("\t".join(["sample"] + [x[0] for x in var_count_list])) From 62d33025699cdf389a6c68e8c9e8d2a64285f2e3 Mon Sep 17 00:00:00 2001 From: saramonzon Date: Thu, 14 Apr 2022 12:28:04 +0200 Subject: [PATCH 20/58] updated changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a8d65c7c..839f00f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes - Updated pipeline template to [nf-core/tools 2.3.2](https://github.com/nf-core/tools/releases/tag/2.3.2) +- Re-factor code of `ivar_variants_to_vcf` script. ### Parameters From 5d0d14b9f575d384fafab0786401a7947209d2d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Tue, 19 Apr 2022 14:19:17 +0200 Subject: [PATCH 21/58] added fasta arg and contig field to vcf header --- bin/ivar_variants_to_vcf.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index 4bb9ca5b..7f63e510 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -9,9 +9,11 @@ from collections import deque import numpy as np +from Bio import SeqIO from scipy.stats import fisher_exact + def parse_args(args=None): Description = "Convert iVar variants TSV file to VCF format." Epilog = """Example usage: python ivar_variants_to_vcf.py """ @@ -45,7 +47,13 @@ def parse_args(args=None): help="Output variants without taking into account if consecutive positions belong to the same codon.", action="store_true", ) - + parser.add_argument( + "-f", + "--fasta", + type=str, + default=None, + help="Fasta file used in mapping and variant calling for vcf header reference genome lenght info.", + ) return parser.parse_args(args) @@ -190,6 +198,13 @@ def write_vcf_header(ref, ignore_strand_bias, file_out, filename): """ ## Define VCF header header_source = ["##fileformat=VCFv4.2", "##source=iVar"] + if ref: + header_contig = [] + for record in SeqIO.parse(ref, "fasta"): + header_contig += ["##contig="] + + header_source += header_contig + header_info = ['##INFO='] header_filter = [ '##FILTER=', @@ -213,6 +228,7 @@ def write_vcf_header(ref, ignore_strand_bias, file_out, filename): header_filter += [ '##FILTER=' ] + header = header_source + header_info + header_filter + header_format + header_cols fout = open(file_out, "w") fout.write("\n".join(header) + "\n") @@ -400,7 +416,7 @@ def main(args=None): ############################## ## Write vcf header to file ## ############################## - write_vcf_header(29990, args.ignore_strand_bias, args.file_out, filename) + write_vcf_header(args.fasta, args.ignore_strand_bias, args.file_out, filename) ################################# ## Read and process input file ## From 70dceae22300157108b94c35127bc38c71257a31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Tue, 19 Apr 2022 14:28:38 +0200 Subject: [PATCH 22/58] fixed filter field --- bin/ivar_variants_to_vcf.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index 7f63e510..b7936304 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -222,9 +222,6 @@ def write_vcf_header(ref, ignore_strand_bias, file_out, filename): ] header_cols = [f"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{filename}"] if not ignore_strand_bias: - header_info += [ - '##INFO=' - ] header_filter += [ '##FILTER=' ] @@ -456,7 +453,7 @@ def main(args=None): if not args.ignore_strand_bias: if strand_bias_filter(format): if filter: - filter += "," + strand_bias_filter(format) + filter += ";" + strand_bias_filter(format) else: filter = strand_bias_filter(format) From 68a05d4612575726524deaa0c0947f639996b888 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Tue, 19 Apr 2022 14:51:37 +0200 Subject: [PATCH 23/58] installed bcftools sort module --- modules.json | 5 ++- modules/nf-core/modules/bcftools/sort/main.nf | 35 +++++++++++++++ .../nf-core/modules/bcftools/sort/meta.yml | 43 +++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 modules/nf-core/modules/bcftools/sort/main.nf create mode 100644 modules/nf-core/modules/bcftools/sort/meta.yml diff --git a/modules.json b/modules.json index adf96289..398b7a6b 100644 --- a/modules.json +++ b/modules.json @@ -30,6 +30,9 @@ "bcftools/query": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, + "bcftools/sort": { + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, "bcftools/stats": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, @@ -158,4 +161,4 @@ } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/modules/bcftools/sort/main.nf b/modules/nf-core/modules/bcftools/sort/main.nf new file mode 100644 index 00000000..8478fe25 --- /dev/null +++ b/modules/nf-core/modules/bcftools/sort/main.nf @@ -0,0 +1,35 @@ +process BCFTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bcftools=1.14" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0': + 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bcftools \\ + sort \\ + --output ${prefix}.vcf.gz \\ + $args \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bcftools/sort/meta.yml b/modules/nf-core/modules/bcftools/sort/meta.yml new file mode 100644 index 00000000..0c244a48 --- /dev/null +++ b/modules/nf-core/modules/bcftools/sort/meta.yml @@ -0,0 +1,43 @@ +name: bcftools_sort +description: Sorts VCF files +keywords: + - sorting + - VCF + - variant calling +tools: + - sort: + description: Sort VCF files by coordinates. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + tool_dev_url: https://github.com/samtools/bcftools + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: The VCF/BCF file to be sorted + pattern: "*.{vcf.gz,vcf,bcf}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Sorted VCF file + pattern: "*.{vcf.gz}" + +authors: + - "@Gwennid" From 04fceef79a30aa476512c4136abbb676cda964d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Tue, 19 Apr 2022 14:57:15 +0200 Subject: [PATCH 24/58] added bcftools sort step in ivar_variants subworkflow --- subworkflows/local/variants_ivar.nf | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index 558cc560..2745ea79 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -4,6 +4,7 @@ include { IVAR_VARIANTS } from '../../modules/nf-core/modules/ivar/variants/main' include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' +include { BCFTOOLS_SORT } from '../nf-core/bcftools/sort/main' include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' include { VARIANTS_QC } from './variants_qc' @@ -51,9 +52,14 @@ workflow VARIANTS_IVAR { ) ch_versions = ch_versions.mix(IVAR_VARIANTS_TO_VCF.out.versions.first()) - VCF_BGZIP_TABIX_STATS ( + BCFTOOLS_SORT ( IVAR_VARIANTS_TO_VCF.out.vcf ) + ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions.first()) + + VCF_BGZIP_TABIX_STATS ( + BCFTOOLS_SORT.out.vcf + ) ch_versions = ch_versions.mix(VCF_BGZIP_TABIX_STATS.out.versions) // From eb9cf95d0acb942a8b511dd71b4ec3c6f6cfc7c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Tue, 19 Apr 2022 14:57:55 +0200 Subject: [PATCH 25/58] removed sorting with awk in ivar_variants_to_vcf module --- modules/local/ivar_variants_to_vcf.nf | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/local/ivar_variants_to_vcf.nf b/modules/local/ivar_variants_to_vcf.nf index 825d87ff..dd5da602 100644 --- a/modules/local/ivar_variants_to_vcf.nf +++ b/modules/local/ivar_variants_to_vcf.nf @@ -29,9 +29,6 @@ process IVAR_VARIANTS_TO_VCF { $args \\ > ${prefix}.variant_counts.log - ## Order vcf by coordinates - cat unsorted.txt | grep "^#" > ${prefix}.vcf; cat unsorted.txt | grep -v "^#" | sort -k1,1d -k2,2n >> ${prefix}.vcf - cat $header ${prefix}.variant_counts.log > ${prefix}.variant_counts_mqc.tsv cat <<-END_VERSIONS > versions.yml From e5e8aa03cf658c3b647e90a0c064cac8581c3513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Tue, 19 Apr 2022 15:37:05 +0200 Subject: [PATCH 26/58] fixed import paths in modules --- subworkflows/local/variants_ivar.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index 2745ea79..14398745 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -4,7 +4,7 @@ include { IVAR_VARIANTS } from '../../modules/nf-core/modules/ivar/variants/main' include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' -include { BCFTOOLS_SORT } from '../nf-core/bcftools/sort/main' +include { BCFTOOLS_SORT } from '../../modules/nf-core/modules/bcftools/sort/main' include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' include { VARIANTS_QC } from './variants_qc' From 4ccd89d4005f19a14249b8879b4874234d9e0943 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Tue, 19 Apr 2022 16:29:44 +0200 Subject: [PATCH 27/58] add fasta as input for ivar_variants_to_vcf module --- modules/local/ivar_variants_to_vcf.nf | 4 +++- subworkflows/local/variants_ivar.nf | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/local/ivar_variants_to_vcf.nf b/modules/local/ivar_variants_to_vcf.nf index dd5da602..22f90755 100644 --- a/modules/local/ivar_variants_to_vcf.nf +++ b/modules/local/ivar_variants_to_vcf.nf @@ -8,6 +8,7 @@ process IVAR_VARIANTS_TO_VCF { input: tuple val(meta), path(tsv) + path fasta path header output: @@ -25,7 +26,8 @@ process IVAR_VARIANTS_TO_VCF { """ ivar_variants_to_vcf.py \\ $tsv \\ - unsorted.txt \\ + ${prefix}.vcf \\ + $fasta \\ $args \\ > ${prefix}.variant_counts.log diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index 14398745..698804ed 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -48,6 +48,7 @@ workflow VARIANTS_IVAR { // IVAR_VARIANTS_TO_VCF ( ch_ivar_tsv, + fasta, ivar_multiqc_header ) ch_versions = ch_versions.mix(IVAR_VARIANTS_TO_VCF.out.versions.first()) From 91ec8abce7fd9654bf8659c7a14cf2620cf5b06f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Tue, 19 Apr 2022 16:59:54 +0200 Subject: [PATCH 28/58] black lint --- bin/ivar_variants_to_vcf.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index b7936304..f00dcaf0 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -13,7 +13,6 @@ from scipy.stats import fisher_exact - def parse_args(args=None): Description = "Convert iVar variants TSV file to VCF format." Epilog = """Example usage: python ivar_variants_to_vcf.py """ @@ -173,7 +172,9 @@ def strand_bias_filter(format): # table: ## REF_FW REF_RV ## ALT_FW ALT_RV - table = np.array([[format[0] - format[1], format[1]], [format[3] - format [4], format[4]]]) + table = np.array( + [[format[0] - format[1], format[1]], [format[3] - format[4], format[4]]] + ) oddsr, pvalue = fisher_exact(table, alternative="greater") # h0: both strands are equally represented. @@ -201,7 +202,9 @@ def write_vcf_header(ref, ignore_strand_bias, file_out, filename): if ref: header_contig = [] for record in SeqIO.parse(ref, "fasta"): - header_contig += ["##contig="] + header_contig += [ + "##contig=" + ] header_source += header_contig From aa22edf5f23f4e93162a72d55a22736c249ec0f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Fri, 22 Apr 2022 10:28:44 +0200 Subject: [PATCH 29/58] updated mulled container for ivar_variants_to_vcf module --- modules/local/ivar_variants_to_vcf.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/ivar_variants_to_vcf.nf b/modules/local/ivar_variants_to_vcf.nf index 22f90755..c78fd0f7 100644 --- a/modules/local/ivar_variants_to_vcf.nf +++ b/modules/local/ivar_variants_to_vcf.nf @@ -3,8 +3,8 @@ process IVAR_VARIANTS_TO_VCF { conda (params.enable_conda ? "conda-forge::python=3.9.5 conda-forge::matplotlib=3.5.1 conda-forge::pandas=1.3.5 conda-forge::r-sys=3.4 conda-forge::regex=2021.11.10 conda-forge::scipy=1.7.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-77320db00eefbbf8c599692102c3d387a37ef02a:08144a66f00dc7684fad061f1466033c0176e7ad-0' : - 'quay.io/biocontainers/mulled-v2-77320db00eefbbf8c599692102c3d387a37ef02a:08144a66f00dc7684fad061f1466033c0176e7ad-0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-ff46c3f421ca930fcc54e67ab61c8e1bcbddfe22:1ad3da14f705eb0cdff6b5a44fea4909307524b4-0' : + 'quay.io/biocontainers/mulled-v2-ff46c3f421ca930fcc54e67ab61c8e1bcbddfe22:1ad3da14f705eb0cdff6b5a44fea4909307524b4-0' }" input: tuple val(meta), path(tsv) From 9136bd9dac30e8bc085c936ea2a6d6fa1d4738f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Fri, 22 Apr 2022 14:32:17 +0200 Subject: [PATCH 30/58] fixed bcftools sort in variants_ivar subworkflow and config --- conf/modules_illumina.config | 2 +- modules/local/ivar_variants_to_vcf.nf | 2 +- subworkflows/local/variants_ivar.nf | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 0edea4dd..d5ac117a 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -388,7 +388,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:VARIANTS_IVAR:.*:TABIX_BGZIP' { + withName: '.*:.*:VARIANTS_IVAR:BCFTOOLS_SORT' { publishDir = [ path: { "${params.outdir}/variants/ivar" }, mode: params.publish_dir_mode, diff --git a/modules/local/ivar_variants_to_vcf.nf b/modules/local/ivar_variants_to_vcf.nf index c78fd0f7..2daf91eb 100644 --- a/modules/local/ivar_variants_to_vcf.nf +++ b/modules/local/ivar_variants_to_vcf.nf @@ -27,7 +27,7 @@ process IVAR_VARIANTS_TO_VCF { ivar_variants_to_vcf.py \\ $tsv \\ ${prefix}.vcf \\ - $fasta \\ + --fasta $fasta \\ $args \\ > ${prefix}.variant_counts.log diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index 698804ed..8ad43c54 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -5,7 +5,7 @@ include { IVAR_VARIANTS } from '../../modules/nf-core/modules/ivar/variants/main' include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' include { BCFTOOLS_SORT } from '../../modules/nf-core/modules/bcftools/sort/main' -include { VCF_BGZIP_TABIX_STATS } from '../nf-core/vcf_bgzip_tabix_stats' +include { VCF_TABIX_STATS } from '../nf-core/vcf_tabix_stats' include { VARIANTS_QC } from './variants_qc' workflow VARIANTS_IVAR { @@ -58,18 +58,18 @@ workflow VARIANTS_IVAR { ) ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions.first()) - VCF_BGZIP_TABIX_STATS ( + VCF_TABIX_STATS ( BCFTOOLS_SORT.out.vcf ) - ch_versions = ch_versions.mix(VCF_BGZIP_TABIX_STATS.out.versions) + ch_versions = ch_versions.mix(VCF_TABIX_STATS.out.versions) // // Run downstream tools for variants QC // VARIANTS_QC ( bam, - VCF_BGZIP_TABIX_STATS.out.vcf, - VCF_BGZIP_TABIX_STATS.out.stats, + BCFTOOLS_SORT.out.vcf, + VCF_TABIX_STATS.out.stats, fasta, sizes, gff, @@ -86,9 +86,9 @@ workflow VARIANTS_IVAR { log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] - vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf ] ] - tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + vcf = BCFTOOLS_SORT.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] snpeff_vcf = VARIANTS_QC.out.snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] snpeff_tbi = VARIANTS_QC.out.snpeff_tbi // channel: [ val(meta), [ tbi ] ] From 9eb97c47a59d881a29c5d4ef6cb9035b5f76ad1d Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 25 Apr 2022 11:52:07 +0100 Subject: [PATCH 31/58] Replace vanilla Linux Biocontainer with Ubuntu --- modules.json | 6 +++--- modules/local/filter_blastn.nf | 4 ++-- modules/local/rename_fasta_header.nf | 4 ++-- modules/nf-core/modules/cat/fastq/main.nf | 4 ++-- modules/nf-core/modules/gunzip/main.nf | 4 ++-- modules/nf-core/modules/untar/main.nf | 25 ++++++++++++++++------- modules/nf-core/modules/untar/meta.yml | 10 +++++++++ 7 files changed, 39 insertions(+), 18 deletions(-) diff --git a/modules.json b/modules.json index adf96289..46a0844f 100644 --- a/modules.json +++ b/modules.json @@ -55,7 +55,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "cat/fastq": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" }, "custom/dumpsoftwareversions": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -70,7 +70,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "gunzip": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" }, "ivar/consensus": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -151,7 +151,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "untar": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" }, "vcflib/vcfuniq": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" diff --git a/modules/local/filter_blastn.nf b/modules/local/filter_blastn.nf index 4173cdb6..5e5ed81b 100644 --- a/modules/local/filter_blastn.nf +++ b/modules/local/filter_blastn.nf @@ -4,8 +4,8 @@ process FILTER_BLASTN { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: tuple val(meta), path(hits) diff --git a/modules/local/rename_fasta_header.nf b/modules/local/rename_fasta_header.nf index 7d9c9901..36810983 100644 --- a/modules/local/rename_fasta_header.nf +++ b/modules/local/rename_fasta_header.nf @@ -3,8 +3,8 @@ process RENAME_FASTA_HEADER { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/modules/cat/fastq/main.nf b/modules/nf-core/modules/cat/fastq/main.nf index bf0877c3..b6854895 100644 --- a/modules/nf-core/modules/cat/fastq/main.nf +++ b/modules/nf-core/modules/cat/fastq/main.nf @@ -4,8 +4,8 @@ process CAT_FASTQ { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: tuple val(meta), path(reads, stageAs: "input*/*") diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/modules/gunzip/main.nf index 9d4b0666..61bf1afa 100644 --- a/modules/nf-core/modules/gunzip/main.nf +++ b/modules/nf-core/modules/gunzip/main.nf @@ -4,8 +4,8 @@ process GUNZIP { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: tuple val(meta), path(archive) diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf index 01205e60..058d1764 100644 --- a/modules/nf-core/modules/untar/main.nf +++ b/modules/nf-core/modules/untar/main.nf @@ -4,23 +4,23 @@ process UNTAR { conda (params.enable_conda ? "conda-forge::sed=4.7" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://containers.biocontainers.pro/s3/SingImgsRepo/biocontainers/v1.2.0_cv1/biocontainers_v1.2.0_cv1.img' : - 'biocontainers/biocontainers:v1.2.0_cv1' }" + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" input: - path archive + tuple val(meta), path(archive) output: - path "$untar" , emit: untar - path "versions.yml", emit: versions + tuple val(meta), path("$untar"), emit: untar + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' + def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' - untar = archive.toString() - '.tar.gz' + untar = archive.toString() - '.tar.gz' """ tar \\ -xzvf \\ @@ -33,4 +33,15 @@ process UNTAR { untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') END_VERSIONS """ + + stub: + untar = archive.toString() - '.tar.gz' + """ + touch $untar + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/modules/untar/meta.yml index e877a97c..d426919b 100644 --- a/modules/nf-core/modules/untar/meta.yml +++ b/modules/nf-core/modules/untar/meta.yml @@ -10,11 +10,21 @@ tools: documentation: https://www.gnu.org/software/tar/manual/ licence: ["GPL-3.0-or-later"] input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - archive: type: file description: File to be untar pattern: "*.{tar}.{gz}" output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] - untar: type: file description: From 870bd0170a84dc28439520716aac51770b0bb506 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 25 Apr 2022 12:04:03 +0100 Subject: [PATCH 32/58] Replace vanilla Linux Biocontainer with Ubuntu --- CHANGELOG.md | 1 + subworkflows/local/prepare_genome_illumina.nf | 16 ++++++++-------- subworkflows/local/prepare_genome_nanopore.nf | 4 ++-- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 839f00f8..50233bbb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes +- [[nf-core/rnaseq#764](https://github.com/nf-core/rnaseq/issues/764)] - Test fails when using GCP due to missing tools in the basic biocontainer - Updated pipeline template to [nf-core/tools 2.3.2](https://github.com/nf-core/tools/releases/tag/2.3.2) - Re-factor code of `ivar_variants_to_vcf` script. diff --git a/subworkflows/local/prepare_genome_illumina.nf b/subworkflows/local/prepare_genome_illumina.nf index 1a9a54c5..70d1e53e 100644 --- a/subworkflows/local/prepare_genome_illumina.nf +++ b/subworkflows/local/prepare_genome_illumina.nf @@ -76,9 +76,9 @@ workflow PREPARE_GENOME { if (params.kraken2_db) { if (params.kraken2_db.endsWith('.tar.gz')) { UNTAR_KRAKEN2_DB ( - params.kraken2_db + [ [:], params.kraken2_db ] ) - ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar + ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_KRAKEN2_DB.out.versions) } else { ch_kraken2_db = file(params.kraken2_db) @@ -151,9 +151,9 @@ workflow PREPARE_GENOME { if (params.bowtie2_index) { if (params.bowtie2_index.endsWith('.tar.gz')) { UNTAR_BOWTIE2_INDEX ( - params.bowtie2_index + [ [:], params.bowtie2_index ] ) - ch_bowtie2_index = UNTAR_BOWTIE2_INDEX.out.untar + ch_bowtie2_index = UNTAR_BOWTIE2_INDEX.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions) } else { ch_bowtie2_index = file(params.bowtie2_index) @@ -175,9 +175,9 @@ workflow PREPARE_GENOME { if (params.nextclade_dataset) { if (params.nextclade_dataset.endsWith('.tar.gz')) { UNTAR_NEXTCLADE_DB ( - params.nextclade_dataset + [ [:], params.nextclade_dataset ] ) - ch_nextclade_db = UNTAR_NEXTCLADE_DB.out.untar + ch_nextclade_db = UNTAR_NEXTCLADE_DB.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_NEXTCLADE_DB.out.versions) } else { ch_nextclade_db = file(params.nextclade_dataset) @@ -202,9 +202,9 @@ workflow PREPARE_GENOME { if (params.blast_db) { if (params.blast_db.endsWith('.tar.gz')) { UNTAR_BLAST_DB ( - params.blast_db + [ [:], params.blast_db ] ) - ch_blast_db = UNTAR_BLAST_DB.out.untar + ch_blast_db = UNTAR_BLAST_DB.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR_BLAST_DB.out.versions) } else { ch_blast_db = file(params.blast_db) diff --git a/subworkflows/local/prepare_genome_nanopore.nf b/subworkflows/local/prepare_genome_nanopore.nf index 6d2449d7..208e582b 100644 --- a/subworkflows/local/prepare_genome_nanopore.nf +++ b/subworkflows/local/prepare_genome_nanopore.nf @@ -94,9 +94,9 @@ workflow PREPARE_GENOME { if (params.nextclade_dataset) { if (params.nextclade_dataset.endsWith('.tar.gz')) { UNTAR ( - params.nextclade_dataset + [ [:], params.nextclade_dataset ] ) - ch_nextclade_db = UNTAR.out.untar + ch_nextclade_db = UNTAR.out.untar.map { it[1] } ch_versions = ch_versions.mix(UNTAR.out.versions) } else { ch_nextclade_db = file(params.nextclade_dataset) From 423a840191da0543058c29596943b7ddf1309c3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Tue, 26 Apr 2022 10:49:41 +0200 Subject: [PATCH 33/58] Updated changelog --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50233bbb..0cab4be8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[nf-core/rnaseq#764](https://github.com/nf-core/rnaseq/issues/764)] - Test fails when using GCP due to missing tools in the basic biocontainer - Updated pipeline template to [nf-core/tools 2.3.2](https://github.com/nf-core/tools/releases/tag/2.3.2) -- Re-factor code of `ivar_variants_to_vcf` script. +- [[#304](https://github.com/nf-core/viralrecon/pull/304)] Re-factor code of `ivar_variants_to_vcf` script. +- [[#308](https://github.com/nf-core/viralrecon/pull/304)] Added contig tag to vcf in `ivar_variants_to_vcf` script and bcftools sort module for vcf sorting. ### Parameters From 46e19287c32920dcfd16ceb115aca14baf06d2f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Tue, 26 Apr 2022 11:14:08 +0200 Subject: [PATCH 34/58] prettier lint modules.json --- modules.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules.json b/modules.json index e6eb67c3..436465ce 100644 --- a/modules.json +++ b/modules.json @@ -161,4 +161,4 @@ } } } -} \ No newline at end of file +} From eba9969b8f6831bb5accd808e449b10051b24817 Mon Sep 17 00:00:00 2001 From: nf-core-bot Date: Mon, 16 May 2022 10:22:16 +0000 Subject: [PATCH 35/58] Template update for nf-core/tools version 2.4 --- .github/workflows/awsfulltest.yml | 3 - .github/workflows/awstest.yml | 3 - .github/workflows/branch.yml | 3 +- .github/workflows/ci.yml | 2 - .github/workflows/fix-linting.yml | 55 ++++++++++ .github/workflows/linting.yml | 4 +- .github/workflows/linting_comment.yml | 1 - .prettierignore | 9 ++ README.md | 25 ++--- assets/email_template.html | 142 ++++++++------------------ bin/check_samplesheet.py | 16 ++- nextflow.config | 2 +- 12 files changed, 135 insertions(+), 130 deletions(-) create mode 100644 .github/workflows/fix-linting.yml create mode 100644 .prettierignore diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 4881dc1b..790fcdae 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -28,6 +28,3 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/viralrecon/results-${{ github.sha }}" } profiles: test_full,aws_tower - nextflow_config: | - process.errorStrategy = 'retry' - process.maxRetries = 3 diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index f9631c28..a3ce4a0a 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -23,6 +23,3 @@ jobs: "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/viralrecon/results-test-${{ github.sha }}" } profiles: test,aws_tower - nextflow_config: | - process.errorStrategy = 'retry' - process.maxRetries = 3 diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index 3d59d488..c585d9fe 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -13,7 +13,7 @@ jobs: - name: Check PRs if: github.repository == 'nf-core/viralrecon' run: | - "{ [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/viralrecon ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]]" + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/viralrecon ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] # If the above check failed, post a comment on the PR explaining the failure # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets @@ -42,4 +42,3 @@ jobs: Thanks again for your contribution! repo-token: ${{ secrets.GITHUB_TOKEN }} allow-repeats: false -# diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f5c5f501..c37bf69f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -48,5 +48,3 @@ jobs: # Remember that you can parallelise this by using strategy.matrix run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results - -# diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 00000000..d1fb62d0 --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,55 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + deploy: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/viralrecon' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@v3 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + - uses: actions/setup-node@v2 + + - name: Install Prettier + run: npm install -g prettier @prettier/plugin-php + + # Check that we actually need to fix something + - name: Run 'prettier --check' + id: prettier_status + run: | + if prettier --check ${GITHUB_WORKSPACE}; then + echo "::set-output name=result::pass" + else + echo "::set-output name=result::fail" + fi + + - name: Run 'prettier --write' + if: steps.prettier_status.outputs.result == 'fail' + run: prettier --write ${GITHUB_WORKSPACE} + + - name: Commit & push changes + if: steps.prettier_status.outputs.result == 'fail' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix linting with Prettier" + git push diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index e9cf5de3..77358dee 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -48,7 +48,7 @@ jobs: wget -qO- get.nextflow.io | bash sudo mv nextflow /usr/local/bin/ - - uses: actions/setup-python@v1 + - uses: actions/setup-python@v3 with: python-version: "3.6" architecture: "x64" @@ -78,5 +78,3 @@ jobs: lint_log.txt lint_results.md PR_number.txt - -# diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 91c487a1..04758f61 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -26,4 +26,3 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} path: linting-logs/lint_results.md -# diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..d0e7ae58 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,9 @@ +email_template.html +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc diff --git a/README.md b/README.md index fbb6b91e..36e77cd0 100644 --- a/README.md +++ b/README.md @@ -1,18 +1,19 @@ -# ![nf-core/viralrecon](docs/images/nf-core/viralrecon_logo_light.png#gh-light-mode-only) ![nf-core/viralrecon](docs/images/nf-core/viralrecon_logo_dark.png#gh-dark-mode-only) +# ![nf-core/viralrecon](docs/images/nf-core-viralrecon_logo_light.png#gh-light-mode-only) ![nf-core/viralrecon](docs/images/nf-core-viralrecon_logo_dark.png#gh-dark-mode-only) [![GitHub Actions CI Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/viralrecon/actions?query=workflow%3A%22nf-core+CI%22) [![GitHub Actions Linting Status](https://github.com/nf-core/viralrecon/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/viralrecon/actions?query=workflow%3A%22nf-core+linting%22) -[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/viralrecon/results) -[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX) - -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg?labelColor=000000)](https://www.nextflow.io/) -[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) -[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) -[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) - -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23viralrecon-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/viralrecon) -[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core) -[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/viralrecon/results) +[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8)](https://doi.org/10.5281/zenodo.XXXXXXX) + +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg)](https://sylabs.io/docs/) +[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/viralrecon) + +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23viralrecon-4A154B?logo=slack)](https://nfcore.slack.com/channels/viralrecon) +[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?logo=twitter)](https://twitter.com/nf_core) +[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction diff --git a/assets/email_template.html b/assets/email_template.html index f08c71a8..523e9a8f 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,111 +1,53 @@ - - - - + + + + - - - nf-core/viralrecon Pipeline Report - - -
- + + nf-core/viralrecon Pipeline Report + + +
-

nf-core/viralrecon v${version}

-

Run Name: $runName

+ - <% if (!success){ out << """ -
-

nf-core/viralrecon execution completed unsuccessfully!

+

nf-core/viralrecon v${version}

+

Run Name: $runName

+ +<% if (!success){ + out << """ +
+

nf-core/viralrecon execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

-
${errorReport}
-
- """ } else { out << """ -
+
${errorReport}
+
+ """ +} else { + out << """ +
nf-core/viralrecon execution completed successfully! -
- """ } %> +
+ """ +} +%> -

The workflow was completed at $dateComplete (duration: $duration)

-

The command used to launch the workflow was as follows:

-
-$commandLine
+

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
-

Pipeline Configuration:

- - - <% out << summary.collect{ k,v -> " - - - - - " }.join("\n") %> - -
- $k - -
$v
-
+

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
-

nf-core/viralrecon

-

https://github.com/nf-core/viralrecon

-
- +

nf-core/viralrecon

+

https://github.com/nf-core/viralrecon

+ +
+ + diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index 5473b624..3652c63c 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -98,7 +98,7 @@ def _validate_pair(self, row): if row[self._first_col] and row[self._second_col]: row[self._single_col] = False assert ( - Path(row[self._first_col]).suffixes == Path(row[self._second_col]).suffixes + Path(row[self._first_col]).suffixes[-2:] == Path(row[self._second_col]).suffixes[-2:] ), "FASTQ pairs must have the same file extensions." else: row[self._single_col] = True @@ -129,6 +129,16 @@ def validate_unique_samples(self): row[self._sample_col] = f"{sample}_T{seen[sample]}" +def read_head(handle, num_lines=10): + """Read the specified number of lines from the current position in the file.""" + lines = [] + for idx, line in enumerate(handle): + if idx == num_lines: + break + lines.append(line) + return "".join(lines) + + def sniff_format(handle): """ Detect the tabular format. @@ -144,13 +154,13 @@ def sniff_format(handle): https://docs.python.org/3/glossary.html#term-text-file """ - peek = handle.read(2048) + peek = read_head(handle) + handle.seek(0) sniffer = csv.Sniffer() if not sniffer.has_header(peek): logger.critical(f"The given sample sheet does not appear to contain a header.") sys.exit(1) dialect = sniffer.sniff(peek) - handle.seek(0) return dialect diff --git a/nextflow.config b/nextflow.config index 06b06c45..b568ed02 100644 --- a/nextflow.config +++ b/nextflow.config @@ -159,7 +159,7 @@ trace { } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.svg" + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" } manifest { From 2919d303c444d5e1d85556b3b52d9289705bfc96 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 7 Jul 2022 11:23:10 +0100 Subject: [PATCH 36/58] Follow up fix for PR #308 --- CHANGELOG.md | 4 ++-- modules/local/ivar_variants_to_vcf.nf | 2 +- subworkflows/local/variants_ivar.nf | 8 ++++---- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cab4be8..25148ab4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes -- [[nf-core/rnaseq#764](https://github.com/nf-core/rnaseq/issues/764)] - Test fails when using GCP due to missing tools in the basic biocontainer -- Updated pipeline template to [nf-core/tools 2.3.2](https://github.com/nf-core/tools/releases/tag/2.3.2) - [[#304](https://github.com/nf-core/viralrecon/pull/304)] Re-factor code of `ivar_variants_to_vcf` script. - [[#308](https://github.com/nf-core/viralrecon/pull/304)] Added contig tag to vcf in `ivar_variants_to_vcf` script and bcftools sort module for vcf sorting. +- [[nf-core/rnaseq#764](https://github.com/nf-core/rnaseq/issues/764)] - Test fails when using GCP due to missing tools in the basic biocontainer +- Updated pipeline template to [nf-core/tools 2.4.1](https://github.com/nf-core/tools/releases/tag/2.4.1) ### Parameters diff --git a/modules/local/ivar_variants_to_vcf.nf b/modules/local/ivar_variants_to_vcf.nf index 2daf91eb..cd220b24 100644 --- a/modules/local/ivar_variants_to_vcf.nf +++ b/modules/local/ivar_variants_to_vcf.nf @@ -9,7 +9,7 @@ process IVAR_VARIANTS_TO_VCF { input: tuple val(meta), path(tsv) path fasta - path header + path header output: tuple val(meta), path("*.vcf"), emit: vcf diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf index 8ad43c54..fee5c54c 100644 --- a/subworkflows/local/variants_ivar.nf +++ b/subworkflows/local/variants_ivar.nf @@ -5,7 +5,7 @@ include { IVAR_VARIANTS } from '../../modules/nf-core/modules/ivar/variants/main' include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' include { BCFTOOLS_SORT } from '../../modules/nf-core/modules/bcftools/sort/main' -include { VCF_TABIX_STATS } from '../nf-core/vcf_tabix_stats' +include { VCF_TABIX_STATS } from '../nf-core/vcf_tabix_stats' include { VARIANTS_QC } from './variants_qc' workflow VARIANTS_IVAR { @@ -86,9 +86,9 @@ workflow VARIANTS_IVAR { log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] - vcf = BCFTOOLS_SORT.out.vcf // channel: [ val(meta), [ vcf ] ] - tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] - stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + vcf = BCFTOOLS_SORT.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] snpeff_vcf = VARIANTS_QC.out.snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] snpeff_tbi = VARIANTS_QC.out.snpeff_tbi // channel: [ val(meta), [ tbi ] ] From 5c07e17d8accfc0cd9a32d33a53c16cf88bb13ef Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 7 Jul 2022 11:38:45 +0100 Subject: [PATCH 37/58] Run nf-core modules update --- modules.json | 42 +++---- modules/nf-core/modules/bcftools/norm/main.nf | 13 ++- .../nf-core/modules/bcftools/norm/meta.yml | 8 +- modules/nf-core/modules/bowtie2/align/main.nf | 104 +++++++++--------- .../nf-core/modules/bowtie2/align/meta.yml | 11 ++ .../modules/custom/getchromsizes/main.nf | 6 +- modules/nf-core/modules/fastp/main.nf | 14 +-- modules/nf-core/modules/fastp/meta.yml | 8 +- modules/nf-core/modules/fastqc/main.nf | 12 ++ modules/nf-core/modules/gunzip/main.nf | 10 ++ .../nf-core/modules/kraken2/kraken2/main.nf | 23 ++-- .../nf-core/modules/kraken2/kraken2/meta.yml | 25 ++++- modules/nf-core/modules/mosdepth/main.nf | 45 +++++--- modules/nf-core/modules/mosdepth/meta.yml | 29 ++++- .../picard/collectmultiplemetrics/main.nf | 34 +++++- .../picard/collectmultiplemetrics/meta.yml | 4 + .../modules/picard/markduplicates/main.nf | 25 ++++- .../nf-core/modules/samtools/flagstat/main.nf | 9 +- .../nf-core/modules/samtools/idxstats/main.nf | 10 +- .../nf-core/modules/samtools/index/main.nf | 18 ++- .../nf-core/modules/samtools/mpileup/main.nf | 17 +-- .../nf-core/modules/samtools/mpileup/meta.yml | 6 +- modules/nf-core/modules/samtools/sort/main.nf | 17 ++- .../nf-core/modules/samtools/stats/main.nf | 24 +++- modules/nf-core/modules/samtools/view/main.nf | 20 +++- .../nf-core/modules/samtools/view/meta.yml | 4 + modules/nf-core/modules/tabix/bgzip/main.nf | 11 +- modules/nf-core/modules/tabix/bgzip/meta.yml | 15 +-- modules/nf-core/modules/tabix/tabix/main.nf | 14 ++- modules/nf-core/modules/tabix/tabix/meta.yml | 4 + modules/nf-core/modules/untar/main.nf | 8 +- .../nf-core/modules/vcflib/vcfuniq/main.nf | 4 +- 32 files changed, 413 insertions(+), 181 deletions(-) diff --git a/modules.json b/modules.json index 436465ce..b2c9e08f 100644 --- a/modules.json +++ b/modules.json @@ -25,7 +25,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "bcftools/norm": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "d596571a03161ee9992b0720c05084cb2ddef324" }, "bcftools/query": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -52,7 +52,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "bowtie2/align": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c" }, "bowtie2/build": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -64,16 +64,16 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "custom/getchromsizes": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "213403187932dbbdd936a04474cc8cd8abae7a08" }, "fastp": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "9b51362a532a14665f513cf987531f9ea5046b74" }, "fastqc": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" }, "gunzip": { - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" + "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" }, "ivar/consensus": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -85,13 +85,13 @@ "git_sha": "cab399507bea60d90de6d7b296163210c371b693" }, "kraken2/kraken2": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "abe025677cdd805cc93032341ab19885473c1a07" }, "minia": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "mosdepth": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "72a31b76eb1b58879e0d91fb1d992e0118693098" }, "nanoplot": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -106,10 +106,10 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "picard/collectmultiplemetrics": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "43d05d5482de19c5d316a1a92ea6045cbbee6b96" }, "picard/markduplicates": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "63db63757cab03cfa7a02c0d0f134b66fbfadea6" }, "plasmidid": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -121,43 +121,43 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "samtools/flagstat": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" }, "samtools/idxstats": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" }, "samtools/index": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, "samtools/mpileup": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "24e05f6097a5dde57dd80d33295ed120f1b81aef" }, "samtools/sort": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, "samtools/stats": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" }, "samtools/view": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce" }, "spades": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "tabix/bgzip": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "37bf3936f3665483d070a5e0e0b314311032af7c" }, "tabix/tabix": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "b3e9b88e80880f450ad79a95b2b7aa05e1de5484" }, "unicycler": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "untar": { - "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" + "git_sha": "51be617b1ca9bff973655eb899d591ed6ab253b5" }, "vcflib/vcfuniq": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" } } } diff --git a/modules/nf-core/modules/bcftools/norm/main.nf b/modules/nf-core/modules/bcftools/norm/main.nf index cd681f21..c48aa9bf 100644 --- a/modules/nf-core/modules/bcftools/norm/main.nf +++ b/modules/nf-core/modules/bcftools/norm/main.nf @@ -8,7 +8,7 @@ process BCFTOOLS_NORM { 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" input: - tuple val(meta), path(vcf) + tuple val(meta), path(vcf), path(tbi) path(fasta) output: @@ -34,4 +34,15 @@ process BCFTOOLS_NORM { bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/bcftools/norm/meta.yml b/modules/nf-core/modules/bcftools/norm/meta.yml index ce4aee85..2b3c8eae 100644 --- a/modules/nf-core/modules/bcftools/norm/meta.yml +++ b/modules/nf-core/modules/bcftools/norm/meta.yml @@ -24,6 +24,12 @@ input: description: | The vcf file to be normalized e.g. 'file1.vcf' + pattern: "*.{vcf,vcf.gz}" + - tbi: + type: file + description: | + An optional index of the VCF file (for when the VCF is compressed) + pattern: "*.vcf.gz.tbi" - fasta: type: file description: FASTA reference file @@ -37,7 +43,7 @@ output: - vcf: type: file description: VCF normalized output file - pattern: "*.{vcf.gz}" + pattern: "*.vcf.gz" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/modules/bowtie2/align/main.nf index 7e8a9659..c74e376f 100644 --- a/modules/nf-core/modules/bowtie2/align/main.nf +++ b/modules/nf-core/modules/bowtie2/align/main.nf @@ -1,77 +1,71 @@ process BOWTIE2_ALIGN { tag "$meta.id" - label 'process_high' + label "process_high" - conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4 bioconda::samtools=1.14 conda-forge::pigz=2.6' : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' : - 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:4d235f41348a00533f18e47c9669f1ecb327f629-0' }" + conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6" : null) + container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ? + "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" : + "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" }" input: tuple val(meta), path(reads) path index val save_unaligned + val sort_bam output: - tuple val(meta), path('*.bam') , emit: bam - tuple val(meta), path('*.log') , emit: log - tuple val(meta), path('*fastq.gz'), emit: fastq, optional:true + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - def unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : '' - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` - bowtie2 \\ - -x \$INDEX \\ - -U $reads \\ - --threads $task.cpus \\ - $unaligned \\ - $args \\ - 2> ${prefix}.bowtie2.log \\ - | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" } else { - def unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : '' - """ - INDEX=`find -L ./ -name "*.rev.1.bt2" | sed 's/.rev.1.bt2//'` - bowtie2 \\ - -x \$INDEX \\ - -1 ${reads[0]} \\ - -2 ${reads[1]} \\ - --threads $task.cpus \\ - $unaligned \\ - $args \\ - 2> ${prefix}.bowtie2.log \\ - | samtools view -@ $task.cpus $args2 -bhS -o ${prefix}.bam - + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } - if [ -f ${prefix}.unmapped.fastq.1.gz ]; then - mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz - fi - if [ -f ${prefix}.unmapped.fastq.2.gz ]; then - mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz - fi + def samtools_command = sort_bam ? 'sort' : 'view' - cat <<-END_VERSIONS > versions.yml - "${task.process}": - bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS - """ - } + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/modules/bowtie2/align/meta.yml index f80421ec..42ba0f96 100644 --- a/modules/nf-core/modules/bowtie2/align/meta.yml +++ b/modules/nf-core/modules/bowtie2/align/meta.yml @@ -2,7 +2,9 @@ name: bowtie2_align description: Align reads to a reference genome using bowtie2 keywords: - align + - map - fasta + - fastq - genome - reference tools: @@ -29,6 +31,15 @@ input: type: file description: Bowtie2 genome index files pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" output: - bam: type: file diff --git a/modules/nf-core/modules/custom/getchromsizes/main.nf b/modules/nf-core/modules/custom/getchromsizes/main.nf index 39da7d34..0eabf3a4 100644 --- a/modules/nf-core/modules/custom/getchromsizes/main.nf +++ b/modules/nf-core/modules/custom/getchromsizes/main.nf @@ -2,10 +2,10 @@ process CUSTOM_GETCHROMSIZES { tag "$fasta" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: path fasta diff --git a/modules/nf-core/modules/fastp/main.nf b/modules/nf-core/modules/fastp/main.nf index d8218350..120392c5 100644 --- a/modules/nf-core/modules/fastp/main.nf +++ b/modules/nf-core/modules/fastp/main.nf @@ -13,7 +13,7 @@ process FASTP { val save_merged output: - tuple val(meta), path('*.trim.fastq.gz') , emit: reads + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads tuple val(meta), path('*.json') , emit: json tuple val(meta), path('*.html') , emit: html tuple val(meta), path('*.log') , emit: log @@ -31,10 +31,10 @@ process FASTP { if (meta.single_end) { def fail_fastq = save_trimmed_fail ? "--failed_out ${prefix}.fail.fastq.gz" : '' """ - [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz fastp \\ --in1 ${prefix}.fastq.gz \\ - --out1 ${prefix}.trim.fastq.gz \\ + --out1 ${prefix}.fastp.fastq.gz \\ --thread $task.cpus \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ @@ -50,13 +50,13 @@ process FASTP { def fail_fastq = save_trimmed_fail ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' """ - [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz - [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz fastp \\ --in1 ${prefix}_1.fastq.gz \\ --in2 ${prefix}_2.fastq.gz \\ - --out1 ${prefix}_1.trim.fastq.gz \\ - --out2 ${prefix}_2.trim.fastq.gz \\ + --out1 ${prefix}_1.fastp.fastq.gz \\ + --out2 ${prefix}_2.fastp.fastq.gz \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ $fail_fastq \\ diff --git a/modules/nf-core/modules/fastp/meta.yml b/modules/nf-core/modules/fastp/meta.yml index f53bb09f..2bd2b1a9 100644 --- a/modules/nf-core/modules/fastp/meta.yml +++ b/modules/nf-core/modules/fastp/meta.yml @@ -22,6 +22,12 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` output: - meta: @@ -32,7 +38,7 @@ output: - reads: type: file description: The trimmed/modified/unmerged fastq reads - pattern: "*trim.fastq.gz" + pattern: "*fastp.fastq.gz" - json: type: file description: Results in JSON format diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf index ed6b8c50..05730368 100644 --- a/modules/nf-core/modules/fastqc/main.nf +++ b/modules/nf-core/modules/fastqc/main.nf @@ -44,4 +44,16 @@ process FASTQC { END_VERSIONS """ } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/modules/gunzip/main.nf index 61bf1afa..70367049 100644 --- a/modules/nf-core/modules/gunzip/main.nf +++ b/modules/nf-core/modules/gunzip/main.nf @@ -31,4 +31,14 @@ process GUNZIP { gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') END_VERSIONS """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/kraken2/kraken2/main.nf b/modules/nf-core/modules/kraken2/kraken2/main.nf index 3ec5df52..d4000233 100644 --- a/modules/nf-core/modules/kraken2/kraken2/main.nf +++ b/modules/nf-core/modules/kraken2/kraken2/main.nf @@ -10,12 +10,15 @@ process KRAKEN2_KRAKEN2 { input: tuple val(meta), path(reads) path db + val save_output_fastqs + val save_reads_assignment output: - tuple val(meta), path('*classified*') , emit: classified - tuple val(meta), path('*unclassified*'), emit: unclassified - tuple val(meta), path('*report.txt') , emit: txt - path "versions.yml" , emit: versions + tuple val(meta), path('*classified*') , optional:true, emit: classified_reads_fastq + tuple val(meta), path('*unclassified*') , optional:true, emit: unclassified_reads_fastq + tuple val(meta), path('*classifiedreads*'), optional:true, emit: classified_reads_assignment + tuple val(meta), path('*report.txt') , emit: report + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -26,19 +29,25 @@ process KRAKEN2_KRAKEN2 { def paired = meta.single_end ? "" : "--paired" def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" + def classified_command = save_output_fastqs ? "--classified-out ${classified}" : "" + def unclassified_command = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" + def readclassification_command = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "" + def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" + """ kraken2 \\ --db $db \\ --threads $task.cpus \\ - --unclassified-out $unclassified \\ - --classified-out $classified \\ --report ${prefix}.kraken2.report.txt \\ --gzip-compressed \\ + $unclassified_command \\ + $classified_command \\ + $readclassification_command \\ $paired \\ $args \\ $reads - pigz -p $task.cpus *.fastq + $compress_reads_command cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/kraken2/kraken2/meta.yml b/modules/nf-core/modules/kraken2/kraken2/meta.yml index 9d6a3855..7129fe3a 100644 --- a/modules/nf-core/modules/kraken2/kraken2/meta.yml +++ b/modules/nf-core/modules/kraken2/kraken2/meta.yml @@ -27,25 +27,40 @@ input: - db: type: directory description: Kraken2 database + - save_output_fastqs: + type: boolean + description: | + If true, optional commands are added to save classified and unclassified reads + as fastq files + - save_reads_assignment: + type: boolean + description: | + If true, an optional command is added to save a file reporting the taxonomic + classification of each input read output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - classified: + - classified_reads_fastq: type: file description: | - Reads classified to belong to any of the taxa + Reads classified as belonging to any of the taxa on the Kraken2 database. pattern: "*{fastq.gz}" - - unclassified: + - unclassified_reads_fastq: type: file description: | - Reads not classified to belong to any of the taxa + Reads not classified to any of the taxa on the Kraken2 database. pattern: "*{fastq.gz}" - - txt: + - classified_reads_assignment: + type: file + description: | + Kraken2 output file indicating the taxonomic assignment of + each input read + - report: type: file description: | Kraken2 report containing stats about classified diff --git a/modules/nf-core/modules/mosdepth/main.nf b/modules/nf-core/modules/mosdepth/main.nf index ff91e06f..d7e3c929 100644 --- a/modules/nf-core/modules/mosdepth/main.nf +++ b/modules/nf-core/modules/mosdepth/main.nf @@ -10,18 +10,22 @@ process MOSDEPTH { input: tuple val(meta), path(bam), path(bai) path bed - val window_size + path fasta output: - tuple val(meta), path('*.global.dist.txt') , emit: global_txt - tuple val(meta), path('*.region.dist.txt') , emit: regions_txt , optional:true - tuple val(meta), path('*.summary.txt') , emit: summary_txt - tuple val(meta), path('*.per-base.d4') , emit: d4 , optional:true - tuple val(meta), path('*.per-base.bed.gz') , emit: per_base_bed, optional:true - tuple val(meta), path('*.per-base.bed.gz.csi'), emit: per_base_csi, optional:true - tuple val(meta), path('*.regions.bed.gz') , emit: regions_bed , optional:true - tuple val(meta), path('*.regions.bed.gz.csi') , emit: regions_csi , optional:true - path "versions.yml" , emit: versions + tuple val(meta), path('*.global.dist.txt') , emit: global_txt + tuple val(meta), path('*.summary.txt') , emit: summary_txt + tuple val(meta), path('*.region.dist.txt') , optional:true, emit: regions_txt + tuple val(meta), path('*.per-base.d4') , optional:true, emit: per_base_d4 + tuple val(meta), path('*.per-base.bed.gz') , optional:true, emit: per_base_bed + tuple val(meta), path('*.per-base.bed.gz.csi') , optional:true, emit: per_base_csi + tuple val(meta), path('*.regions.bed.gz') , optional:true, emit: regions_bed + tuple val(meta), path('*.regions.bed.gz.csi') , optional:true, emit: regions_csi + tuple val(meta), path('*.quantized.bed.gz') , optional:true, emit: quantized_bed + tuple val(meta), path('*.quantized.bed.gz.csi') , optional:true, emit: quantized_csi + tuple val(meta), path('*.thresholds.bed.gz') , optional:true, emit: thresholds_bed + tuple val(meta), path('*.thresholds.bed.gz.csi'), optional:true, emit: thresholds_csi + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -29,19 +33,24 @@ process MOSDEPTH { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - if (window_size) { - interval = "--by ${window_size}" - } else if ( bed ) { - interval = "--by ${bed}" - } else { - interval = "" + def reference = fasta ? "--fasta ${fasta}" : "" + def interval = bed ? "--by ${bed}" : "" + if (bed && args.contains("--by")) { + exit 1, "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" } + if (!bed && args.contains("--thresholds")) { + exit 1, "'--thresholds' can only be specified in conjunction with '--by'" + } + """ mosdepth \\ + --threads $task.cpus \\ $interval \\ + $reference \\ $args \\ $prefix \\ $bam + cat <<-END_VERSIONS > versions.yml "${task.process}": mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') @@ -59,6 +68,10 @@ process MOSDEPTH { touch ${prefix}.per-base.bed.gz.csi touch ${prefix}.regions.bed.gz touch ${prefix}.regions.bed.gz.csi + touch ${prefix}.quantized.bed.gz + touch ${prefix}.quantized.bed.gz.csi + touch ${prefix}.thresholds.bed.gz + touch ${prefix}.thresholds.bed.gz.csi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/mosdepth/meta.yml b/modules/nf-core/modules/mosdepth/meta.yml index 636e966b..d1e33447 100644 --- a/modules/nf-core/modules/mosdepth/meta.yml +++ b/modules/nf-core/modules/mosdepth/meta.yml @@ -30,10 +30,10 @@ input: type: file description: BED file with intersected intervals pattern: "*.{bed}" - - window_size: - type: integer - description: Window size - pattern: "[0-9]+" + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta}" output: - meta: type: map @@ -60,6 +60,10 @@ output: type: file description: Index file for BED file with per-base coverage pattern: "*.{per-base.bed.gz.csi}" + - per_base_d4: + type: file + description: D4 file with per-base coverage + pattern: "*.{per-base.d4}" - regions_bed: type: file description: BED file with per-region coverage @@ -68,6 +72,22 @@ output: type: file description: Index file for BED file with per-region coverage pattern: "*.{regions.bed.gz.csi}" + - quantized_bed: + type: file + description: BED file with binned coverage + pattern: "*.{quantized.bed.gz}" + - quantized_csi: + type: file + description: Index file for BED file with binned coverage + pattern: "*.{quantized.bed.gz.csi}" + - thresholds_bed: + type: file + description: BED file with the number of bases in each region that are covered at or above each threshold + pattern: "*.{thresholds.bed.gz}" + - thresholds_csi: + type: file + description: Index file for BED file with threshold coverage + pattern: "*.{thresholds.bed.gz.csi}" - versions: type: file description: File containing software versions @@ -76,3 +96,4 @@ authors: - "@joseespinosa" - "@drpatelh" - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf b/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf index e023ea3c..2f991321 100644 --- a/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf +++ b/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf @@ -2,14 +2,15 @@ process PICARD_COLLECTMULTIPLEMETRICS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.26.10" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.26.10--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.26.10--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) path fasta + path fai output: tuple val(meta), path("*_metrics"), emit: metrics @@ -22,6 +23,7 @@ process PICARD_COLLECTMULTIPLEMETRICS { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" def avail_mem = 3 if (!task.memory) { log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' @@ -33,13 +35,33 @@ process PICARD_COLLECTMULTIPLEMETRICS { -Xmx${avail_mem}g \\ CollectMultipleMetrics \\ $args \\ - INPUT=$bam \\ - OUTPUT=${prefix}.CollectMultipleMetrics \\ - REFERENCE_SEQUENCE=$fasta + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectMultipleMetrics \\ + $reference cat <<-END_VERSIONS > versions.yml "${task.process}": picard: \$(picard CollectMultipleMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectMultipleMetrics.alignment_summary_metrics + touch ${prefix}.CollectMultipleMetrics.insert_size_metrics + touch ${prefix}.CollectMultipleMetrics.quality_distribution.pdf + touch ${prefix}.CollectMultipleMetrics.base_distribution_by_cycle_metrics + touch ${prefix}.CollectMultipleMetrics.quality_by_cycle_metrics + touch ${prefix}.CollectMultipleMetrics.read_length_histogram.pdf + touch ${prefix}.CollectMultipleMetrics.base_distribution_by_cycle.pdf + touch ${prefix}.CollectMultipleMetrics.quality_by_cycle.pdf + touch ${prefix}.CollectMultipleMetrics.insert_size_histogram.pdf + touch ${prefix}.CollectMultipleMetrics.quality_distribution_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectMultipleMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml b/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml index 68b5c65e..c11b02cf 100644 --- a/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml +++ b/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml @@ -28,6 +28,10 @@ input: - fasta: type: file description: Genome fasta file + - fai: + type: file + description: Index of FASTA file. Only needed when fasta is supplied. + pattern: "*.fai" output: - meta: type: map diff --git a/modules/nf-core/modules/picard/markduplicates/main.nf b/modules/nf-core/modules/picard/markduplicates/main.nf index 5196b6ed..87f913d4 100644 --- a/modules/nf-core/modules/picard/markduplicates/main.nf +++ b/modules/nf-core/modules/picard/markduplicates/main.nf @@ -2,10 +2,10 @@ process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.26.10" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.26.10--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.26.10--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" input: tuple val(meta), path(bam) @@ -33,9 +33,22 @@ process PICARD_MARKDUPLICATES { -Xmx${avail_mem}g \\ MarkDuplicates \\ $args \\ - I=$bam \\ - O=${prefix}.bam \\ - M=${prefix}.MarkDuplicates.metrics.txt + --INPUT $bam \\ + --OUTPUT ${prefix}.bam \\ + --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.bam.bai + touch ${prefix}.MarkDuplicates.metrics.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/modules/samtools/flagstat/main.nf index c267922b..03ec2dcf 100644 --- a/modules/nf-core/modules/samtools/flagstat/main.nf +++ b/modules/nf-core/modules/samtools/flagstat/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_FLAGSTAT { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: tuple val(meta), path(bam), path(bai) @@ -19,12 +19,13 @@ process SAMTOOLS_FLAGSTAT { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" """ samtools \\ flagstat \\ --threads ${task.cpus-1} \\ $bam \\ - > ${bam}.flagstat + > ${prefix}.flagstat cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/idxstats/main.nf b/modules/nf-core/modules/samtools/idxstats/main.nf index 8a057413..4b245419 100644 --- a/modules/nf-core/modules/samtools/idxstats/main.nf +++ b/modules/nf-core/modules/samtools/idxstats/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_IDXSTATS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: tuple val(meta), path(bam), path(bai) @@ -19,11 +19,13 @@ process SAMTOOLS_IDXSTATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ samtools \\ idxstats \\ $bam \\ - > ${bam}.idxstats + > ${prefix}.idxstats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/index/main.nf b/modules/nf-core/modules/samtools/index/main.nf index dfe0234f..e04e63e8 100644 --- a/modules/nf-core/modules/samtools/index/main.nf +++ b/modules/nf-core/modules/samtools/index/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_INDEX { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: tuple val(meta), path(input) @@ -33,4 +33,16 @@ process SAMTOOLS_INDEX { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/samtools/mpileup/main.nf b/modules/nf-core/modules/samtools/mpileup/main.nf index 77afae60..cfab5c98 100644 --- a/modules/nf-core/modules/samtools/mpileup/main.nf +++ b/modules/nf-core/modules/samtools/mpileup/main.nf @@ -2,18 +2,17 @@ process SAMTOOLS_MPILEUP { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" - + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: - tuple val(meta), path(bam) + tuple val(meta), path(input), path(intervals) path fasta output: - tuple val(meta), path("*.mpileup"), emit: mpileup - path "versions.yml" , emit: versions + tuple val(meta), path("*.mpileup.gz"), emit: mpileup + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -21,12 +20,14 @@ process SAMTOOLS_MPILEUP { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def intervals = intervals ? "-l ${intervals}" : "" """ samtools mpileup \\ --fasta-ref $fasta \\ --output ${prefix}.mpileup \\ $args \\ - $bam + $input + bgzip ${prefix}.mpileup cat <<-END_VERSIONS > versions.yml "${task.process}": samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') diff --git a/modules/nf-core/modules/samtools/mpileup/meta.yml b/modules/nf-core/modules/samtools/mpileup/meta.yml index c384f5c6..ae499e92 100644 --- a/modules/nf-core/modules/samtools/mpileup/meta.yml +++ b/modules/nf-core/modules/samtools/mpileup/meta.yml @@ -21,7 +21,7 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - bam: + - input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" @@ -29,6 +29,10 @@ input: type: file description: FASTA reference file pattern: "*.{fasta,fa}" + - intervals: + type: file + description: Interval FILE + pattern: "*.bed" output: - meta: type: map diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/modules/samtools/sort/main.nf index 0f2237cc..b4fc1cbe 100644 --- a/modules/nf-core/modules/samtools/sort/main.nf +++ b/modules/nf-core/modules/samtools/sort/main.nf @@ -2,10 +2,10 @@ process SAMTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: tuple val(meta), path(bam) @@ -28,4 +28,15 @@ process SAMTOOLS_SORT { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/samtools/stats/main.nf b/modules/nf-core/modules/samtools/stats/main.nf index f6fe3bfe..c913bc5e 100644 --- a/modules/nf-core/modules/samtools/stats/main.nf +++ b/modules/nf-core/modules/samtools/stats/main.nf @@ -2,13 +2,13 @@ process SAMTOOLS_STATS { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: - tuple val(meta), path(input), path(input_index) + tuple val(meta), path(bam), path(bai) path fasta output: @@ -20,14 +20,26 @@ process SAMTOOLS_STATS { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def reference = fasta ? "--reference ${fasta}" : "" """ samtools \\ stats \\ --threads ${task.cpus-1} \\ ${reference} \\ - ${input} \\ - > ${input}.stats + ${bam} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/samtools/view/main.nf b/modules/nf-core/modules/samtools/view/main.nf index aee21a4e..55194e88 100644 --- a/modules/nf-core/modules/samtools/view/main.nf +++ b/modules/nf-core/modules/samtools/view/main.nf @@ -2,13 +2,13 @@ process SAMTOOLS_VIEW { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::samtools=1.14" : null) + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.14--hb421002_0' : - 'quay.io/biocontainers/samtools:1.14--hb421002_0' }" + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" input: - tuple val(meta), path(input) + tuple val(meta), path(input), path(index) path fasta output: @@ -41,4 +41,16 @@ process SAMTOOLS_VIEW { samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.cram + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/samtools/view/meta.yml b/modules/nf-core/modules/samtools/view/meta.yml index 5604bfa7..a8b43ecc 100644 --- a/modules/nf-core/modules/samtools/view/meta.yml +++ b/modules/nf-core/modules/samtools/view/meta.yml @@ -25,6 +25,10 @@ input: type: file description: BAM/CRAM/SAM file pattern: "*.{bam,cram,sam}" + - index: + type: optional file + description: BAM.BAI/CRAM.CRAI file + pattern: "*.{.bai,.crai}" - fasta: type: optional file description: Reference file the CRAM was created with diff --git a/modules/nf-core/modules/tabix/bgzip/main.nf b/modules/nf-core/modules/tabix/bgzip/main.nf index 90940a5d..18e83c84 100644 --- a/modules/nf-core/modules/tabix/bgzip/main.nf +++ b/modules/nf-core/modules/tabix/bgzip/main.nf @@ -11,17 +11,20 @@ process TABIX_BGZIP { tuple val(meta), path(input) output: - tuple val(meta), path("*.gz"), emit: gz - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}*"), emit: output + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = input.toString().endsWith(".gz") + command1 = in_bgzip ? '-d' : '-c' + command2 = in_bgzip ? '' : " > ${prefix}.${input.getExtension()}.gz" """ - bgzip -c $args $input > ${prefix}.${input.getExtension()}.gz + bgzip $command1 $args -@${task.cpus} $input $command2 cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/tabix/bgzip/meta.yml b/modules/nf-core/modules/tabix/bgzip/meta.yml index 207427e4..50070175 100644 --- a/modules/nf-core/modules/tabix/bgzip/meta.yml +++ b/modules/nf-core/modules/tabix/bgzip/meta.yml @@ -1,13 +1,14 @@ name: tabix_bgzip -description: Compresses files +description: Compresses/decompresses files keywords: - compress + - decompress - bgzip - tabix tools: - bgzip: description: | - Bgzip compresses files in a similar manner to, and compatible with, gzip. + Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. homepage: https://www.htslib.org/doc/tabix.html documentation: http://www.htslib.org/doc/bgzip.html doi: 10.1093/bioinformatics/btp352 @@ -18,19 +19,19 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - file: + - input: type: file - description: text file + description: file to compress or to decompress output: - meta: type: map description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - file: + - output: type: file - description: Output compressed file - pattern: "*.{gz}" + description: Output compressed/decompressed file + pattern: "*." - versions: type: file description: File containing software versions diff --git a/modules/nf-core/modules/tabix/tabix/main.nf b/modules/nf-core/modules/tabix/tabix/main.nf index 5f516261..e155e468 100644 --- a/modules/nf-core/modules/tabix/tabix/main.nf +++ b/modules/nf-core/modules/tabix/tabix/main.nf @@ -11,7 +11,8 @@ process TABIX_TABIX { tuple val(meta), path(tab) output: - tuple val(meta), path("*.tbi"), emit: tbi + tuple val(meta), path("*.tbi"), optional:true, emit: tbi + tuple val(meta), path("*.csi"), optional:true, emit: csi path "versions.yml" , emit: versions when: @@ -27,4 +28,15 @@ process TABIX_TABIX { tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${tab}.tbi + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ } diff --git a/modules/nf-core/modules/tabix/tabix/meta.yml b/modules/nf-core/modules/tabix/tabix/meta.yml index 89478abe..fcc6e524 100644 --- a/modules/nf-core/modules/tabix/tabix/meta.yml +++ b/modules/nf-core/modules/tabix/tabix/meta.yml @@ -31,6 +31,10 @@ output: type: file description: tabix index file pattern: "*.{tbi}" + - csi: + type: file + description: coordinate sorted index file + pattern: "*.{csi}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf index 058d1764..29ab10a5 100644 --- a/modules/nf-core/modules/untar/main.nf +++ b/modules/nf-core/modules/untar/main.nf @@ -21,12 +21,18 @@ process UNTAR { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' untar = archive.toString() - '.tar.gz' + """ + mkdir output + tar \\ + -C output --strip-components 1 \\ -xzvf \\ $args \\ $archive \\ - $args2 \\ + $args2 + + mv output ${untar} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/modules/vcflib/vcfuniq/main.nf b/modules/nf-core/modules/vcflib/vcfuniq/main.nf index a01e8485..05d9b9d5 100644 --- a/modules/nf-core/modules/vcflib/vcfuniq/main.nf +++ b/modules/nf-core/modules/vcflib/vcfuniq/main.nf @@ -1,9 +1,8 @@ -def VERSION = '1.0.2' // Version information not provided by tool on CLI - process VCFLIB_VCFUNIQ { tag "$meta.id" label 'process_low' + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. conda (params.enable_conda ? "bioconda::vcflib=1.0.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? 'https://depot.galaxyproject.org/singularity/vcflib:1.0.2--h3198e80_5': @@ -22,6 +21,7 @@ process VCFLIB_VCFUNIQ { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. """ vcfuniq \\ $vcf \\ From 8049c25cd87509e5be82f14637da9ec04c31bea5 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 7 Jul 2022 11:44:27 +0100 Subject: [PATCH 38/58] Intentionally ignore expected nf-core warnings --- .nf-core.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.nf-core.yml b/.nf-core.yml index 40bcac74..192f7e02 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -4,3 +4,7 @@ lint: - assets/email_template.html - assets/email_template.txt - lib/NfcoreTemplate.groovy + files_exist: + - assets/multiqc_config.yml + - conf/igenomes.config + - lib/WorkflowViralrecon.groovy From f106fafb8ca4172dde46887c6ccd62028a4eb1ef Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 7 Jul 2022 14:52:34 +0100 Subject: [PATCH 39/58] Re-install nf-core modules after updating tool versions --- .../nf-core/modules/artic/guppyplex/main.nf | 6 ++-- modules/nf-core/modules/artic/minion/main.nf | 6 ++-- .../modules/bcftools/consensus/main.nf | 6 ++-- .../nf-core/modules/bcftools/filter/main.nf | 6 ++-- .../nf-core/modules/bcftools/mpileup/main.nf | 6 ++-- modules/nf-core/modules/bcftools/norm/main.nf | 6 ++-- .../nf-core/modules/bcftools/query/main.nf | 6 ++-- modules/nf-core/modules/bcftools/sort/main.nf | 6 ++-- .../nf-core/modules/bcftools/stats/main.nf | 6 ++-- .../custom/dumpsoftwareversions/main.nf | 6 ++-- modules/nf-core/modules/nanoplot/main.nf | 6 ++-- .../modules/nextclade/datasetget/main.nf | 8 ++--- modules/nf-core/modules/nextclade/run/main.nf | 32 ++++++++++--------- modules/nf-core/modules/pangolin/main.nf | 6 ++-- .../picard/collectmultiplemetrics/main.nf | 6 ++-- .../modules/picard/markduplicates/main.nf | 6 ++-- modules/nf-core/modules/quast/main.nf | 6 ++-- modules/nf-core/modules/spades/main.nf | 6 ++-- .../nf-core/modules/vcflib/vcfuniq/main.nf | 6 ++-- 19 files changed, 72 insertions(+), 70 deletions(-) diff --git a/modules/nf-core/modules/artic/guppyplex/main.nf b/modules/nf-core/modules/artic/guppyplex/main.nf index 8e6b2879..12f6f68c 100644 --- a/modules/nf-core/modules/artic/guppyplex/main.nf +++ b/modules/nf-core/modules/artic/guppyplex/main.nf @@ -2,10 +2,10 @@ process ARTIC_GUPPYPLEX { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::artic=1.2.1" : null) + conda (params.enable_conda ? "bioconda::artic=1.2.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/artic:1.2.1--py_0' : - 'quay.io/biocontainers/artic:1.2.1--py_0' }" + 'https://depot.galaxyproject.org/singularity/artic:1.2.2--pyhdfd78af_0' : + 'quay.io/biocontainers/artic:1.2.2--pyhdfd78af_0' }" input: tuple val(meta), path(fastq_dir) diff --git a/modules/nf-core/modules/artic/minion/main.nf b/modules/nf-core/modules/artic/minion/main.nf index 22a6fd87..9d00ef6a 100644 --- a/modules/nf-core/modules/artic/minion/main.nf +++ b/modules/nf-core/modules/artic/minion/main.nf @@ -2,10 +2,10 @@ process ARTIC_MINION { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? "bioconda::artic=1.2.1" : null) + conda (params.enable_conda ? "bioconda::artic=1.2.2" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/artic:1.2.1--py_0' : - 'quay.io/biocontainers/artic:1.2.1--py_0' }" + 'https://depot.galaxyproject.org/singularity/artic:1.2.2--pyhdfd78af_0' : + 'quay.io/biocontainers/artic:1.2.2--pyhdfd78af_0' }" input: tuple val(meta), path(fastq) diff --git a/modules/nf-core/modules/bcftools/consensus/main.nf b/modules/nf-core/modules/bcftools/consensus/main.nf index a0c436e2..e28dc7f4 100644 --- a/modules/nf-core/modules/bcftools/consensus/main.nf +++ b/modules/nf-core/modules/bcftools/consensus/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_CONSENSUS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: tuple val(meta), path(vcf), path(tbi), path(fasta) diff --git a/modules/nf-core/modules/bcftools/filter/main.nf b/modules/nf-core/modules/bcftools/filter/main.nf index 82961e32..ef99eda2 100644 --- a/modules/nf-core/modules/bcftools/filter/main.nf +++ b/modules/nf-core/modules/bcftools/filter/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_FILTER { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/modules/bcftools/mpileup/main.nf b/modules/nf-core/modules/bcftools/mpileup/main.nf index 676eae7a..b7795bfc 100644 --- a/modules/nf-core/modules/bcftools/mpileup/main.nf +++ b/modules/nf-core/modules/bcftools/mpileup/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_MPILEUP { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/modules/bcftools/norm/main.nf b/modules/nf-core/modules/bcftools/norm/main.nf index c48aa9bf..96f306bc 100644 --- a/modules/nf-core/modules/bcftools/norm/main.nf +++ b/modules/nf-core/modules/bcftools/norm/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_NORM { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: tuple val(meta), path(vcf), path(tbi) diff --git a/modules/nf-core/modules/bcftools/query/main.nf b/modules/nf-core/modules/bcftools/query/main.nf index 8921abdd..5de34a9e 100644 --- a/modules/nf-core/modules/bcftools/query/main.nf +++ b/modules/nf-core/modules/bcftools/query/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_QUERY { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: tuple val(meta), path(vcf), path(tbi) diff --git a/modules/nf-core/modules/bcftools/sort/main.nf b/modules/nf-core/modules/bcftools/sort/main.nf index 8478fe25..9552b57c 100644 --- a/modules/nf-core/modules/bcftools/sort/main.nf +++ b/modules/nf-core/modules/bcftools/sort/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_SORT { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::bcftools=1.14" : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0': - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/modules/bcftools/stats/main.nf b/modules/nf-core/modules/bcftools/stats/main.nf index 7e150d1f..1e0f3a47 100644 --- a/modules/nf-core/modules/bcftools/stats/main.nf +++ b/modules/nf-core/modules/bcftools/stats/main.nf @@ -2,10 +2,10 @@ process BCFTOOLS_STATS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::bcftools=1.14' : null) + conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/bcftools:1.14--h88f3f91_0' : - 'quay.io/biocontainers/bcftools:1.14--h88f3f91_0' }" + 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0': + 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf index 327d5100..12293efc 100644 --- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf @@ -2,10 +2,10 @@ process CUSTOM_DUMPSOFTWAREVERSIONS { label 'process_low' // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? "bioconda::multiqc=1.12" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }" input: path versions diff --git a/modules/nf-core/modules/nanoplot/main.nf b/modules/nf-core/modules/nanoplot/main.nf index 083e2374..83c0e2ec 100644 --- a/modules/nf-core/modules/nanoplot/main.nf +++ b/modules/nf-core/modules/nanoplot/main.nf @@ -2,10 +2,10 @@ process NANOPLOT { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? 'bioconda::nanoplot=1.39.0' : null) + conda (params.enable_conda ? 'bioconda::nanoplot=1.40.0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nanoplot:1.39.0--pyhdfd78af_0' : - 'quay.io/biocontainers/nanoplot:1.39.0--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/nanoplot:1.40.0--pyhdfd78af_0' : + 'quay.io/biocontainers/nanoplot:1.40.0--pyhdfd78af_0' }" input: tuple val(meta), path(ontfile) diff --git a/modules/nf-core/modules/nextclade/datasetget/main.nf b/modules/nf-core/modules/nextclade/datasetget/main.nf index 4dd82ee3..a9f52c84 100644 --- a/modules/nf-core/modules/nextclade/datasetget/main.nf +++ b/modules/nf-core/modules/nextclade/datasetget/main.nf @@ -2,10 +2,10 @@ process NEXTCLADE_DATASETGET { tag "$dataset" label 'process_low' - conda (params.enable_conda ? "bioconda::nextclade=1.10.2" : null) + conda (params.enable_conda ? "bioconda::nextclade=2.2.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nextclade:1.10.2--h9ee0642_0' : - 'quay.io/biocontainers/nextclade:1.10.2--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/nextclade:2.2.0--h9ee0642_0' : + 'quay.io/biocontainers/nextclade:2.2.0--h9ee0642_0' }" input: val dataset @@ -36,7 +36,7 @@ process NEXTCLADE_DATASETGET { cat <<-END_VERSIONS > versions.yml "${task.process}": - nextclade: \$(nextclade --version 2>&1) + nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//') END_VERSIONS """ } diff --git a/modules/nf-core/modules/nextclade/run/main.nf b/modules/nf-core/modules/nextclade/run/main.nf index 4d4bdb88..22f72781 100644 --- a/modules/nf-core/modules/nextclade/run/main.nf +++ b/modules/nf-core/modules/nextclade/run/main.nf @@ -2,21 +2,26 @@ process NEXTCLADE_RUN { tag "$meta.id" label 'process_low' - conda (params.enable_conda ? "bioconda::nextclade=1.10.2" : null) + conda (params.enable_conda ? "bioconda::nextclade=2.2.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/nextclade:1.10.2--h9ee0642_0' : - 'quay.io/biocontainers/nextclade:1.10.2--h9ee0642_0' }" + 'https://depot.galaxyproject.org/singularity/nextclade:2.2.0--h9ee0642_0' : + 'quay.io/biocontainers/nextclade:2.2.0--h9ee0642_0' }" input: tuple val(meta), path(fasta) path dataset output: - tuple val(meta), path("${prefix}.csv") , emit: csv - tuple val(meta), path("${prefix}.tsv") , emit: tsv - tuple val(meta), path("${prefix}.json") , emit: json - tuple val(meta), path("${prefix}.tree.json"), emit: json_tree - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.csv") , optional:true, emit: csv + tuple val(meta), path("${prefix}.errors.csv") , optional:true, emit: csv_errors + tuple val(meta), path("${prefix}.insertions.csv"), optional:true, emit: csv_insertions + tuple val(meta), path("${prefix}.tsv") , optional:true, emit: tsv + tuple val(meta), path("${prefix}.json") , optional:true, emit: json + tuple val(meta), path("${prefix}.auspice.json") , optional:true, emit: json_auspice + tuple val(meta), path("${prefix}.ndjson") , optional:true, emit: ndjson + tuple val(meta), path("${prefix}.aligned.fasta") , optional:true, emit: fasta_aligned + tuple val(meta), path("*.translation.fasta") , optional:true, emit: fasta_translation + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -29,17 +34,14 @@ process NEXTCLADE_RUN { run \\ $args \\ --jobs $task.cpus \\ - --input-fasta $fasta \\ --input-dataset $dataset \\ - --output-csv ${prefix}.csv \\ - --output-tsv ${prefix}.tsv \\ - --output-json ${prefix}.json \\ - --output-tree ${prefix}.tree.json \\ - --output-basename ${prefix} + --output-all ./ \\ + --output-basename ${prefix} \\ + $fasta cat <<-END_VERSIONS > versions.yml "${task.process}": - nextclade: \$(nextclade --version 2>&1) + nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//') END_VERSIONS """ } diff --git a/modules/nf-core/modules/pangolin/main.nf b/modules/nf-core/modules/pangolin/main.nf index 5af557ac..6414b5d3 100644 --- a/modules/nf-core/modules/pangolin/main.nf +++ b/modules/nf-core/modules/pangolin/main.nf @@ -2,10 +2,10 @@ process PANGOLIN { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? 'bioconda::pangolin=3.1.20' : null) + conda (params.enable_conda ? 'bioconda::pangolin=4.1.1' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/pangolin:3.1.20--pyhdfd78af_0' : - 'quay.io/biocontainers/pangolin:3.1.20--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/pangolin:4.1.1--pyhdfd78af_0' : + 'quay.io/biocontainers/pangolin:4.1.1--pyhdfd78af_0' }" input: tuple val(meta), path(fasta) diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf b/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf index 2f991321..63f4e872 100644 --- a/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf +++ b/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf @@ -2,10 +2,10 @@ process PICARD_COLLECTMULTIPLEMETRICS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/modules/picard/markduplicates/main.nf b/modules/nf-core/modules/picard/markduplicates/main.nf index 87f913d4..4e559fea 100644 --- a/modules/nf-core/modules/picard/markduplicates/main.nf +++ b/modules/nf-core/modules/picard/markduplicates/main.nf @@ -2,10 +2,10 @@ process PICARD_MARKDUPLICATES { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::picard=2.27.2" : null) + conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/picard:2.27.2--hdfd78af_0' : - 'quay.io/biocontainers/picard:2.27.2--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" input: tuple val(meta), path(bam) diff --git a/modules/nf-core/modules/quast/main.nf b/modules/nf-core/modules/quast/main.nf index beb797d4..5585491b 100644 --- a/modules/nf-core/modules/quast/main.nf +++ b/modules/nf-core/modules/quast/main.nf @@ -1,10 +1,10 @@ process QUAST { label 'process_medium' - conda (params.enable_conda ? 'bioconda::quast=5.0.2' : null) + conda (params.enable_conda ? 'bioconda::quast=5.2.0' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/quast:5.0.2--py37pl526hb5aa323_2' : - 'quay.io/biocontainers/quast:5.0.2--py37pl526hb5aa323_2' }" + 'https://depot.galaxyproject.org/singularity/quast:5.2.0--py39pl5321h2add14b_1' : + 'quay.io/biocontainers/quast:5.2.0--py39pl5321h2add14b_1' }" input: path consensus diff --git a/modules/nf-core/modules/spades/main.nf b/modules/nf-core/modules/spades/main.nf index b7ece6f6..a467fcd7 100644 --- a/modules/nf-core/modules/spades/main.nf +++ b/modules/nf-core/modules/spades/main.nf @@ -2,10 +2,10 @@ process SPADES { tag "$meta.id" label 'process_high' - conda (params.enable_conda ? 'bioconda::spades=3.15.3' : null) + conda (params.enable_conda ? 'bioconda::spades=3.15.4' : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/spades:3.15.3--h95f258a_0' : - 'quay.io/biocontainers/spades:3.15.3--h95f258a_0' }" + 'https://depot.galaxyproject.org/singularity/spades:3.15.4--h95f258a_0' : + 'quay.io/biocontainers/spades:3.15.4--h95f258a_0' }" input: tuple val(meta), path(illumina), path(pacbio), path(nanopore) diff --git a/modules/nf-core/modules/vcflib/vcfuniq/main.nf b/modules/nf-core/modules/vcflib/vcfuniq/main.nf index 05d9b9d5..707f074b 100644 --- a/modules/nf-core/modules/vcflib/vcfuniq/main.nf +++ b/modules/nf-core/modules/vcflib/vcfuniq/main.nf @@ -3,10 +3,10 @@ process VCFLIB_VCFUNIQ { label 'process_low' // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. - conda (params.enable_conda ? "bioconda::vcflib=1.0.2" : null) + conda (params.enable_conda ? "bioconda::vcflib=1.0.3" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/vcflib:1.0.2--h3198e80_5': - 'quay.io/biocontainers/vcflib:1.0.2--h3198e80_5' }" + 'https://depot.galaxyproject.org/singularity/vcflib:1.0.3--hecb563c_1': + 'quay.io/biocontainers/vcflib:1.0.3--hecb563c_1' }" input: tuple val(meta), path(vcf), path(tbi) From 7561671f8f48f6bdf1e1a83ac7687fa41ecdc60d Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Thu, 7 Jul 2022 14:52:45 +0100 Subject: [PATCH 40/58] Re-install nf-core modules after updating tool versions --- modules.json | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/modules.json b/modules.json index b2c9e08f..b32025ab 100644 --- a/modules.json +++ b/modules.json @@ -7,34 +7,34 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "artic/guppyplex": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "artic/minion": { - "git_sha": "cab399507bea60d90de6d7b296163210c371b693" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bandage/image": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "bcftools/consensus": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bcftools/filter": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bcftools/mpileup": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bcftools/norm": { - "git_sha": "d596571a03161ee9992b0720c05084cb2ddef324" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bcftools/query": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bcftools/sort": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bcftools/stats": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "bedtools/getfasta": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -61,7 +61,7 @@ "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9" }, "custom/dumpsoftwareversions": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "custom/getchromsizes": { "git_sha": "213403187932dbbdd936a04474cc8cd8abae7a08" @@ -94,22 +94,22 @@ "git_sha": "72a31b76eb1b58879e0d91fb1d992e0118693098" }, "nanoplot": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "nextclade/datasetget": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "nextclade/run": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "pangolin": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "picard/collectmultiplemetrics": { - "git_sha": "43d05d5482de19c5d316a1a92ea6045cbbee6b96" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "picard/markduplicates": { - "git_sha": "63db63757cab03cfa7a02c0d0f134b66fbfadea6" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "plasmidid": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" @@ -118,7 +118,7 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "quast": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "samtools/flagstat": { "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" @@ -142,7 +142,7 @@ "git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce" }, "spades": { - "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" }, "tabix/bgzip": { "git_sha": "37bf3936f3665483d070a5e0e0b314311032af7c" @@ -157,7 +157,7 @@ "git_sha": "51be617b1ca9bff973655eb899d591ed6ab253b5" }, "vcflib/vcfuniq": { - "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" } } } From f77036726b6d7db5e69ac12997a2a27fe627c39c Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 8 Jul 2022 17:16:15 +0100 Subject: [PATCH 41/58] Fix tests after updating tool versions for all modules --- conf/modules_illumina.config | 12 +++-- conf/modules_nanopore.config | 5 +- modules.json | 3 -- modules/local/multiqc_illumina.nf | 6 +-- modules/local/multiqc_nanopore.nf | 6 +-- modules/local/snpeff_ann.nf | 6 +-- modules/local/snpeff_build.nf | 6 +-- modules/local/snpsift_extractfields.nf | 6 +-- .../nf-core/modules/samtools/mpileup/main.nf | 36 ------------- .../nf-core/modules/samtools/mpileup/meta.yml | 52 ------------------- subworkflows/local/variants_bcftools.nf | 2 +- subworkflows/nf-core/align_bowtie2.nf | 4 +- subworkflows/nf-core/filter_bam_samtools.nf | 7 +-- subworkflows/nf-core/vcf_bgzip_tabix_stats.nf | 4 +- workflows/illumina.nf | 20 ++++--- workflows/nanopore.nf | 7 +-- 16 files changed, 50 insertions(+), 132 deletions(-) delete mode 100644 modules/nf-core/modules/samtools/mpileup/main.nf delete mode 100644 modules/nf-core/modules/samtools/mpileup/meta.yml diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index d5ac117a..691f3345 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -146,7 +146,7 @@ if (!params.skip_variants) { withName: 'BOWTIE2_ALIGN' { ext.args = '--local --very-sensitive-local --seed 1' - ext.args2 = '-F4' + ext.args2 = '-F4 -bhS' publishDir = [ [ path: { "${params.outdir}/variants/bowtie2/log" }, @@ -180,6 +180,7 @@ if (!params.skip_variants) { } withName: '.*:.*:ALIGN_BOWTIE2:.*:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.sorted.bam" } publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, mode: params.publish_dir_mode, @@ -244,6 +245,7 @@ if (!params.skip_variants) { } withName: '.*:.*:PRIMER_TRIM_IVAR:.*:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.ivar_trim.sorted.bam" } publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, mode: params.publish_dir_mode, @@ -257,7 +259,7 @@ if (!params.skip_variants) { process { withName: 'PICARD_MARKDUPLICATES' { ext.args = [ - 'ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT TMP_DIR=tmp', + '--ASSUME_SORTED true --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp', params.filter_duplicates ? 'REMOVE_DUPLICATES=true' : '' ].join(' ').trim() ext.prefix = { "${meta.id}.markduplicates.sorted" } @@ -276,7 +278,6 @@ if (!params.skip_variants) { } withName: '.*:MARK_DUPLICATES_PICARD:SAMTOOLS_INDEX' { - ext.prefix = { "${meta.id}.markduplicates.sorted" } publishDir = [ path: { "${params.outdir}/variants/bowtie2" }, mode: params.publish_dir_mode, @@ -285,6 +286,7 @@ if (!params.skip_variants) { } withName: '.*:MARK_DUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.markduplicates.sorted.bam" } publishDir = [ path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, mode: params.publish_dir_mode, @@ -297,7 +299,7 @@ if (!params.skip_variants) { if (!params.skip_picard_metrics) { process { withName: 'PICARD_COLLECTMULTIPLEMETRICS' { - ext.args = 'VALIDATION_STRINGENCY=LENIENT TMP_DIR=tmp' + ext.args = '--VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' publishDir = [ [ path: { "${params.outdir}/variants/bowtie2/picard_metrics" }, @@ -317,7 +319,7 @@ if (!params.skip_variants) { if (!params.skip_mosdepth) { process { withName: 'MOSDEPTH_GENOME' { - ext.args = '--fast-mode' + ext.args = '--fast-mode --by 200' publishDir = [ path: { "${params.outdir}/variants/bowtie2/mosdepth/genome" }, mode: params.publish_dir_mode, diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index 4168ac7c..16df6353 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -91,7 +91,6 @@ process { } withName: '.*:.*:.*:SAMTOOLS_INDEX' { - ext.prefix = { "${meta.id}.mapped.sorted" } publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}" }, mode: params.publish_dir_mode, @@ -100,7 +99,7 @@ process { } withName: '.*:.*:.*:BAM_STATS_SAMTOOLS:.*' { - ext.prefix = { "${meta.id}.mapped.sorted" } + ext.prefix = { "${meta.id}.mapped.sorted.bam" } publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/samtools_stats" }, mode: params.publish_dir_mode, @@ -168,7 +167,7 @@ if (!params.skip_mosdepth) { } withName: 'MOSDEPTH_GENOME' { - ext.args = '--fast-mode' + ext.args = '--fast-mode --by 200' publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/mosdepth/genome" }, mode: params.publish_dir_mode, diff --git a/modules.json b/modules.json index b32025ab..e0646ad4 100644 --- a/modules.json +++ b/modules.json @@ -129,9 +129,6 @@ "samtools/index": { "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, - "samtools/mpileup": { - "git_sha": "24e05f6097a5dde57dd80d33295ed120f1b81aef" - }, "samtools/sort": { "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" }, diff --git a/modules/local/multiqc_illumina.nf b/modules/local/multiqc_illumina.nf index 6591d294..59a031c2 100644 --- a/modules/local/multiqc_illumina.nf +++ b/modules/local/multiqc_illumina.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? "bioconda::multiqc=1.13a" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : + 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" input: path 'multiqc_config.yaml' diff --git a/modules/local/multiqc_nanopore.nf b/modules/local/multiqc_nanopore.nf index cbd2b19d..e23db35c 100644 --- a/modules/local/multiqc_nanopore.nf +++ b/modules/local/multiqc_nanopore.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_medium' - conda (params.enable_conda ? "bioconda::multiqc=1.11" : null) + conda (params.enable_conda ? "bioconda::multiqc=1.13a" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.11--pyhdfd78af_0' : - 'quay.io/biocontainers/multiqc:1.11--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : + 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" input: path 'multiqc_config.yaml' diff --git a/modules/local/snpeff_ann.nf b/modules/local/snpeff_ann.nf index 4ccf4db4..b3311a79 100644 --- a/modules/local/snpeff_ann.nf +++ b/modules/local/snpeff_ann.nf @@ -2,10 +2,10 @@ process SNPEFF_ANN { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::snpeff=5.0" : null) + conda (params.enable_conda ? "bioconda::snpeff=5.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' : - 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : + 'quay.io/biocontainers/snpeff:5.1--hdfd78af_2' }" input: tuple val(meta), path(vcf) diff --git a/modules/local/snpeff_build.nf b/modules/local/snpeff_build.nf index faaeba19..5d95260f 100644 --- a/modules/local/snpeff_build.nf +++ b/modules/local/snpeff_build.nf @@ -2,10 +2,10 @@ process SNPEFF_BUILD { tag "$fasta" label 'process_low' - conda (params.enable_conda ? "bioconda::snpeff=5.0" : null) + conda (params.enable_conda ? "bioconda::snpeff=5.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' : - 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" + 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : + 'quay.io/biocontainers/snpeff:5.1--hdfd78af_2' }" input: path fasta diff --git a/modules/local/snpsift_extractfields.nf b/modules/local/snpsift_extractfields.nf index 573063e0..959010dc 100644 --- a/modules/local/snpsift_extractfields.nf +++ b/modules/local/snpsift_extractfields.nf @@ -2,10 +2,10 @@ process SNPSIFT_EXTRACTFIELDS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::snpsift=4.3.1t" : null) + conda (params.enable_conda ? "bioconda::snpsift=5.1" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snpsift:4.3.1t--hdfd78af_3' : - 'quay.io/biocontainers/snpsift:4.3.1t--hdfd78af_3' }" + 'https://depot.galaxyproject.org/singularity/snpsift:5.1--hdfd78af_0' : + 'quay.io/biocontainers/snpsift:5.1--hdfd78af_0' }" input: tuple val(meta), path(vcf) diff --git a/modules/nf-core/modules/samtools/mpileup/main.nf b/modules/nf-core/modules/samtools/mpileup/main.nf deleted file mode 100644 index cfab5c98..00000000 --- a/modules/nf-core/modules/samtools/mpileup/main.nf +++ /dev/null @@ -1,36 +0,0 @@ -process SAMTOOLS_MPILEUP { - tag "$meta.id" - label 'process_medium' - - conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : - 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" - input: - tuple val(meta), path(input), path(intervals) - path fasta - - output: - tuple val(meta), path("*.mpileup.gz"), emit: mpileup - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def intervals = intervals ? "-l ${intervals}" : "" - """ - samtools mpileup \\ - --fasta-ref $fasta \\ - --output ${prefix}.mpileup \\ - $args \\ - $input - bgzip ${prefix}.mpileup - cat <<-END_VERSIONS > versions.yml - "${task.process}": - samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/modules/samtools/mpileup/meta.yml b/modules/nf-core/modules/samtools/mpileup/meta.yml deleted file mode 100644 index ae499e92..00000000 --- a/modules/nf-core/modules/samtools/mpileup/meta.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: samtools_mpileup -description: BAM -keywords: - - mpileup - - bam - - sam - - cram -tools: - - samtools: - description: | - SAMtools is a set of utilities for interacting with and post-processing - short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. - These files are generated as output by short read aligners like BWA. - homepage: http://www.htslib.org/ - documentation: hhttp://www.htslib.org/doc/samtools.html - doi: 10.1093/bioinformatics/btp352 - licence: ["MIT"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input: - type: file - description: BAM/CRAM/SAM file - pattern: "*.{bam,cram,sam}" - - fasta: - type: file - description: FASTA reference file - pattern: "*.{fasta,fa}" - - intervals: - type: file - description: Interval FILE - pattern: "*.bed" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - mpileup: - type: file - description: mpileup file - pattern: "*.{mpileup}" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@drpatelh" - - "@joseespinosa" diff --git a/subworkflows/local/variants_bcftools.nf b/subworkflows/local/variants_bcftools.nf index 662c860a..868ff522 100644 --- a/subworkflows/local/variants_bcftools.nf +++ b/subworkflows/local/variants_bcftools.nf @@ -56,7 +56,7 @@ workflow VARIANTS_BCFTOOLS { // Split multi-allelic positions // BCFTOOLS_NORM ( - ch_vcf, + ch_vcf.join(ch_tbi, by: [0]), fasta ) ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions.first()) diff --git a/subworkflows/nf-core/align_bowtie2.nf b/subworkflows/nf-core/align_bowtie2.nf index 1ec4764b..af2f676d 100644 --- a/subworkflows/nf-core/align_bowtie2.nf +++ b/subworkflows/nf-core/align_bowtie2.nf @@ -10,6 +10,7 @@ workflow ALIGN_BOWTIE2 { reads // channel: [ val(meta), [ reads ] ] index // channel: /path/to/bowtie2/index/ save_unaligned // value: boolean + sort_bam // value: boolean main: @@ -21,7 +22,8 @@ workflow ALIGN_BOWTIE2 { BOWTIE2_ALIGN ( reads, index, - save_unaligned + save_unaligned, + sort_bam ) ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) diff --git a/subworkflows/nf-core/filter_bam_samtools.nf b/subworkflows/nf-core/filter_bam_samtools.nf index cfa8b568..050bf085 100644 --- a/subworkflows/nf-core/filter_bam_samtools.nf +++ b/subworkflows/nf-core/filter_bam_samtools.nf @@ -8,7 +8,8 @@ include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' workflow FILTER_BAM_SAMTOOLS { take: - bam // channel: [ val(meta), [ bam ] ] + bam_bai // channel: [ val(meta), [ bam ], [ bai ] ] + fasta // path : fasta main: @@ -18,8 +19,8 @@ workflow FILTER_BAM_SAMTOOLS { // Filter BAM using Samtools view // SAMTOOLS_VIEW ( - bam, - [] + bam_bai, + fasta ) ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) diff --git a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf b/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf index 6df4bad7..67e4f992 100644 --- a/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf +++ b/subworkflows/nf-core/vcf_bgzip_tabix_stats.nf @@ -19,12 +19,12 @@ workflow VCF_BGZIP_TABIX_STATS { ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) VCF_TABIX_STATS ( - TABIX_BGZIP.out.gz + TABIX_BGZIP.out.output ) ch_versions = ch_versions.mix(VCF_TABIX_STATS.out.versions) emit: - vcf = TABIX_BGZIP.out.gz // channel: [ val(meta), [ vcf.gz ] ] + vcf = TABIX_BGZIP.out.output // channel: [ val(meta), [ vcf.gz ] ] tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 81fb9368..1fa55548 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -251,17 +251,19 @@ workflow ILLUMINA { if (!params.skip_kraken2) { KRAKEN2_KRAKEN2 ( ch_variants_fastq, - PREPARE_GENOME.out.kraken2_db + PREPARE_GENOME.out.kraken2_db, + params.kraken2_variants_host_filter || params.kraken2_assembly_host_filter, + params.kraken2_variants_host_filter || params.kraken2_assembly_host_filter ) - ch_kraken2_multiqc = KRAKEN2_KRAKEN2.out.txt + ch_kraken2_multiqc = KRAKEN2_KRAKEN2.out.report ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first().ifEmpty(null)) if (params.kraken2_variants_host_filter) { - ch_variants_fastq = KRAKEN2_KRAKEN2.out.unclassified + ch_variants_fastq = KRAKEN2_KRAKEN2.out.unclassified_reads_fastq } if (params.kraken2_assembly_host_filter) { - ch_assembly_fastq = KRAKEN2_KRAKEN2.out.unclassified + ch_assembly_fastq = KRAKEN2_KRAKEN2.out.unclassified_reads_fastq } } @@ -276,7 +278,8 @@ workflow ILLUMINA { ALIGN_BOWTIE2 ( ch_variants_fastq, PREPARE_GENOME.out.bowtie2_index, - params.save_unaligned + params.save_unaligned, + false ) ch_bam = ALIGN_BOWTIE2.out.bam ch_bai = ALIGN_BOWTIE2.out.bai @@ -358,7 +361,8 @@ workflow ILLUMINA { if (!params.skip_variants && !params.skip_picard_metrics) { PICARD_COLLECTMULTIPLEMETRICS ( ch_bam, - PREPARE_GENOME.out.fasta + PREPARE_GENOME.out.fasta, + [] ) ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first().ifEmpty(null)) } @@ -372,7 +376,7 @@ workflow ILLUMINA { MOSDEPTH_GENOME ( ch_bam.join(ch_bai, by: [0]), [], - 200 + [] ) ch_mosdepth_multiqc = MOSDEPTH_GENOME.out.global_txt ch_versions = ch_versions.mix(MOSDEPTH_GENOME.out.versions.first().ifEmpty(null)) @@ -386,7 +390,7 @@ workflow ILLUMINA { MOSDEPTH_AMPLICON ( ch_bam.join(ch_bai, by: [0]), PREPARE_GENOME.out.primer_collapsed_bed, - 0 + [] ) ch_versions = ch_versions.mix(MOSDEPTH_AMPLICON.out.versions.first().ifEmpty(null)) diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index 555c6472..34b50b93 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -342,7 +342,8 @@ workflow NANOPORE { // SUBWORKFLOW: Filter unmapped reads from BAM // FILTER_BAM_SAMTOOLS ( - ARTIC_MINION.out.bam + ARTIC_MINION.out.bam.join(ARTIC_MINION.out.bai, by: [0]), + [] ) ch_versions = ch_versions.mix(FILTER_BAM_SAMTOOLS.out.versions) @@ -356,7 +357,7 @@ workflow NANOPORE { MOSDEPTH_GENOME ( ARTIC_MINION.out.bam_primertrimmed.join(ARTIC_MINION.out.bai_primertrimmed, by: [0]), [], - 200 + [] ) ch_mosdepth_multiqc = MOSDEPTH_GENOME.out.global_txt ch_versions = ch_versions.mix(MOSDEPTH_GENOME.out.versions.first().ifEmpty(null)) @@ -369,7 +370,7 @@ workflow NANOPORE { MOSDEPTH_AMPLICON ( ARTIC_MINION.out.bam_primertrimmed.join(ARTIC_MINION.out.bai_primertrimmed, by: [0]), PREPARE_GENOME.out.primer_collapsed_bed, - 0 + [] ) ch_versions = ch_versions.mix(MOSDEPTH_AMPLICON.out.versions.first().ifEmpty(null)) From 92a0ce3e85e5fa38ed20be2bb4d7c2553bf96958 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 8 Jul 2022 17:20:26 +0100 Subject: [PATCH 42/58] Revert version bumps for SnpEff and SnpSift --- modules/local/snpeff_ann.nf | 6 +++--- modules/local/snpeff_build.nf | 6 +++--- modules/local/snpsift_extractfields.nf | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/local/snpeff_ann.nf b/modules/local/snpeff_ann.nf index b3311a79..0bc48627 100644 --- a/modules/local/snpeff_ann.nf +++ b/modules/local/snpeff_ann.nf @@ -2,10 +2,10 @@ process SNPEFF_ANN { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::snpeff=5.1" : null) + conda (params.enable_conda ? "bioconda::snpeff=5.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : - 'quay.io/biocontainers/snpeff:5.1--hdfd78af_2' }" + 'https://depot.galaxyproject.org/singularity/5.0--hdfd78af_1' : + 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" input: tuple val(meta), path(vcf) diff --git a/modules/local/snpeff_build.nf b/modules/local/snpeff_build.nf index 5d95260f..399ff062 100644 --- a/modules/local/snpeff_build.nf +++ b/modules/local/snpeff_build.nf @@ -2,10 +2,10 @@ process SNPEFF_BUILD { tag "$fasta" label 'process_low' - conda (params.enable_conda ? "bioconda::snpeff=5.1" : null) + conda (params.enable_conda ? "bioconda::snpeff=5.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snpeff:5.1--hdfd78af_2' : - 'quay.io/biocontainers/snpeff:5.1--hdfd78af_2' }" + 'https://depot.galaxyproject.org/singularity/5.0--hdfd78af_1' : + 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" input: path fasta diff --git a/modules/local/snpsift_extractfields.nf b/modules/local/snpsift_extractfields.nf index 959010dc..573063e0 100644 --- a/modules/local/snpsift_extractfields.nf +++ b/modules/local/snpsift_extractfields.nf @@ -2,10 +2,10 @@ process SNPSIFT_EXTRACTFIELDS { tag "$meta.id" label 'process_medium' - conda (params.enable_conda ? "bioconda::snpsift=5.1" : null) + conda (params.enable_conda ? "bioconda::snpsift=4.3.1t" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/snpsift:5.1--hdfd78af_0' : - 'quay.io/biocontainers/snpsift:5.1--hdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/snpsift:4.3.1t--hdfd78af_3' : + 'quay.io/biocontainers/snpsift:4.3.1t--hdfd78af_3' }" input: tuple val(meta), path(vcf) From 734a4da564a343b82bf08edc55c016a6fb3e36ac Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 8 Jul 2022 17:34:39 +0100 Subject: [PATCH 43/58] Fix incorrect container definitions for SnpEff and SnpSift --- CHANGELOG.md | 24 ++++++++++++++++++++++++ modules/local/snpeff_ann.nf | 2 +- modules/local/snpeff_build.nf | 2 +- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25148ab4..9d7dd67f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[nf-core/rnaseq#764](https://github.com/nf-core/rnaseq/issues/764)] - Test fails when using GCP due to missing tools in the basic biocontainer - Updated pipeline template to [nf-core/tools 2.4.1](https://github.com/nf-core/tools/releases/tag/2.4.1) +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| `artic` | 1.2.1 | 1.2.2 | +| `bcftools` | 1.14 | 1.15.1 | +| `multiqc` | 1.11 | 1.13a | +| `nanoplot` | 1.39.0 | 1.40.0 | +| `nextclade` | 1.10.2 | 2.2.0 | +| `pangolin` | 3.1.20 | 4.1.1 | +| `picard` | 2.26.10 | 2.27.4 | +| `quast` | 5.0.2 | 5.2.0 | +| `samtools` | 1.14 | 1.15.1 | +| `spades` | 3.15.3 | 3.15.4 | +| `vcflib` | 1.0.2 | 1.0.3 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + ### Parameters ## [[2.4.1](https://github.com/nf-core/viralrecon/releases/tag/2.4.1)] - 2022-03-01 diff --git a/modules/local/snpeff_ann.nf b/modules/local/snpeff_ann.nf index 0bc48627..4ccf4db4 100644 --- a/modules/local/snpeff_ann.nf +++ b/modules/local/snpeff_ann.nf @@ -4,7 +4,7 @@ process SNPEFF_ANN { conda (params.enable_conda ? "bioconda::snpeff=5.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/5.0--hdfd78af_1' : + 'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' : 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" input: diff --git a/modules/local/snpeff_build.nf b/modules/local/snpeff_build.nf index 399ff062..faaeba19 100644 --- a/modules/local/snpeff_build.nf +++ b/modules/local/snpeff_build.nf @@ -4,7 +4,7 @@ process SNPEFF_BUILD { conda (params.enable_conda ? "bioconda::snpeff=5.0" : null) container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/5.0--hdfd78af_1' : + 'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' : 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" input: From f2c5adf29d2ade3542f32b78120ebba3d9aa6d2c Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 8 Jul 2022 17:37:15 +0100 Subject: [PATCH 44/58] Fix ECLint --- .prettierignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.prettierignore b/.prettierignore index 8e8d979d..d0e7ae58 100644 --- a/.prettierignore +++ b/.prettierignore @@ -6,4 +6,4 @@ results/ .DS_Store testing/ testing* -*.pyc \ No newline at end of file +*.pyc From a1f810b2b2eb56a2769979fc04dc656d6de424f6 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 8 Jul 2022 20:39:51 +0100 Subject: [PATCH 45/58] Fix #292 --- CHANGELOG.md | 5 +++-- subworkflows/nf-core/fastqc_fastp.nf | 23 +++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d7dd67f..76cabfe3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes -- [[#304](https://github.com/nf-core/viralrecon/pull/304)] Re-factor code of `ivar_variants_to_vcf` script. -- [[#308](https://github.com/nf-core/viralrecon/pull/304)] Added contig tag to vcf in `ivar_variants_to_vcf` script and bcftools sort module for vcf sorting. +- [[#292](https://github.com/nf-core/viralrecon/issues/292)] - Filter empty FastQ files after adapter trimming +- [[#304](https://github.com/nf-core/viralrecon/pull/304)] - Re-factor code of `ivar_variants_to_vcf` script +- [[#306](https://github.com/nf-core/viralrecon/issues/306)] - Add contig field information in vcf header in ivar_variants_to_vcf and use bcftools sort - [[nf-core/rnaseq#764](https://github.com/nf-core/rnaseq/issues/764)] - Test fails when using GCP due to missing tools in the basic biocontainer - Updated pipeline template to [nf-core/tools 2.4.1](https://github.com/nf-core/tools/releases/tag/2.4.1) diff --git a/subworkflows/nf-core/fastqc_fastp.nf b/subworkflows/nf-core/fastqc_fastp.nf index 45be0c14..097c927e 100644 --- a/subworkflows/nf-core/fastqc_fastp.nf +++ b/subworkflows/nf-core/fastqc_fastp.nf @@ -6,6 +6,16 @@ include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/modules/fastqc/mai include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/modules/fastqc/main' include { FASTP } from '../../modules/nf-core/modules/fastp/main' +// +// Function that parses fastp json output file to get total number of reads after trimming +// +import groovy.json.JsonSlurper + +def getFastpReadsAfterFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['after_filtering']['total_reads'].toInteger() +} + workflow FASTQC_FASTP { take: reads // channel: [ val(meta), [ reads ] ] @@ -49,6 +59,19 @@ workflow FASTQC_FASTP { trim_reads_merged = FASTP.out.reads_merged ch_versions = ch_versions.mix(FASTP.out.versions.first()) + // + // Filter empty FastQ files after adapter trimming so FastQC doesn't fail + // + trim_reads + .join(trim_json) + .map { + meta, reads, json -> + if (getFastpReadsAfterFiltering(json) > 400000) { + [ meta, reads ] + } + } + .set { trim_reads } + if (!params.skip_fastqc) { FASTQC_TRIM ( trim_reads From 6a0bc3151331ab2610caab84a4e76bc258d98eaa Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Fri, 8 Jul 2022 20:52:16 +0100 Subject: [PATCH 46/58] Fix #292 again --- subworkflows/nf-core/fastqc_fastp.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/nf-core/fastqc_fastp.nf b/subworkflows/nf-core/fastqc_fastp.nf index 097c927e..13ba31eb 100644 --- a/subworkflows/nf-core/fastqc_fastp.nf +++ b/subworkflows/nf-core/fastqc_fastp.nf @@ -66,7 +66,7 @@ workflow FASTQC_FASTP { .join(trim_json) .map { meta, reads, json -> - if (getFastpReadsAfterFiltering(json) > 400000) { + if (getFastpReadsAfterFiltering(json) > 0) { [ meta, reads ] } } From af9cfe87205bf31f1fe89bc8f8129d48424b8a9b Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 11 Jul 2022 11:33:30 +0100 Subject: [PATCH 47/58] Fix config bug - see issue 1722 on MultiQC repo --- conf/modules_illumina.config | 5 ++++- conf/modules_nanopore.config | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 691f3345..b839ddba 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -1050,7 +1050,10 @@ if (!params.skip_assembly) { if (!params.skip_multiqc) { process { withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = [ + '-k yaml', + params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ].join(' ').trim() publishDir = [ [ path: { "${params.outdir}/multiqc" }, diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index 16df6353..5b396618 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -361,7 +361,10 @@ if (!params.skip_asciigenome) { if (!params.skip_multiqc) { process { withName: 'MULTIQC' { - ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ext.args = [ + '-k yaml', + params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ].join(' ').trim() publishDir = [ path: { "${params.outdir}/multiqc/${params.artic_minion_caller}" }, mode: params.publish_dir_mode, From 95c0cc2255ca0542509dc40a8e5b952f9f1b06fa Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 11 Jul 2022 11:34:01 +0100 Subject: [PATCH 48/58] Add print statement back into ivar_variants_to_vcf.py scrip --- bin/ivar_variants_to_vcf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py index f00dcaf0..a900f502 100755 --- a/bin/ivar_variants_to_vcf.py +++ b/bin/ivar_variants_to_vcf.py @@ -569,8 +569,8 @@ def main(args=None): ## variant counts to pass to MultiQC ## ############################################# var_count_list = [(k, str(v)) for k, v in sorted(var_count_dict.items())] - ("\t".join(["sample"] + [x[0] for x in var_count_list])) - ("\t".join([filename] + [x[1] for x in var_count_list])) + print("\t".join(["sample"] + [x[0] for x in var_count_list])) + print("\t".join([filename] + [x[1] for x in var_count_list])) if __name__ == "__main__": From 7464f88c38a959c00f917af4e5fe04a523d9319c Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 11 Jul 2022 12:01:28 +0100 Subject: [PATCH 49/58] Fix Pangolin and Nextclade lineage parsing issues --- assets/multiqc_config_illumina.yml | 1 + bin/multiqc_to_custom_csv.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/assets/multiqc_config_illumina.yml b/assets/multiqc_config_illumina.yml index 3cff53aa..1979a953 100644 --- a/assets/multiqc_config_illumina.yml +++ b/assets/multiqc_config_illumina.yml @@ -283,6 +283,7 @@ extra_fn_clean_exts: - ".markduplicates" - ".unclassified" - "_MN908947.3" + - " MN908947.3" extra_fn_clean_trim: - "Consensus_" diff --git a/bin/multiqc_to_custom_csv.py b/bin/multiqc_to_custom_csv.py index dac8b7ae..90a4c21a 100755 --- a/bin/multiqc_to_custom_csv.py +++ b/bin/multiqc_to_custom_csv.py @@ -239,7 +239,7 @@ def main(args=None): "multiqc_pangolin.yaml", [("Pangolin lineage", ["lineage"])], ), - ("multiqc_nextclade_clade.yaml", [("Nextclade clade", ["clade"])]), + ("multiqc_nextclade_clade-plot.yaml", [("Nextclade clade", ["clade"])]), ] illumina_assembly_files = [ @@ -308,7 +308,7 @@ def main(args=None): ("multiqc_snpeff.yaml", [("# Missense variants", ["MISSENSE"])]), ("multiqc_quast.yaml", [("# Ns per 100kb consensus", ["# N's per 100 kbp"])]), ("multiqc_pangolin.yaml", [("Pangolin lineage", ["lineage"])]), - ("multiqc_nextclade_clade.yaml", [("Nextclade clade", ["clade"])]), + ("multiqc_nextclade_clade-plot.yaml", [("Nextclade clade", ["clade"])]), ] if args.PLATFORM == "illumina": From 678e2351e8ffc52171167deac434592946f959c6 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 11 Jul 2022 12:12:02 +0100 Subject: [PATCH 50/58] Fix publishDir bug introduced in PR 308 --- conf/modules_illumina.config | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index b839ddba..7d0ebcca 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -398,7 +398,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:VARIANTS_IVAR:.*:.*:TABIX_TABIX' { + withName: '.*:.*:VARIANTS_IVAR:.*:TABIX_TABIX' { ext.args = '-p vcf -f' publishDir = [ path: { "${params.outdir}/variants/ivar" }, @@ -407,7 +407,7 @@ if (!params.skip_variants) { ] } - withName: '.*:.*:VARIANTS_IVAR:.*:.*:BCFTOOLS_STATS' { + withName: '.*:.*:VARIANTS_IVAR:.*:BCFTOOLS_STATS' { publishDir = [ path: { "${params.outdir}/variants/ivar/bcftools_stats" }, mode: params.publish_dir_mode, From cb7efb78b5535935406526a1fb1db3ace6bfea0b Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 11 Jul 2022 12:36:17 +0100 Subject: [PATCH 51/58] Fix #311 --- workflows/nanopore.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf index 34b50b93..9bb63231 100644 --- a/workflows/nanopore.nf +++ b/workflows/nanopore.nf @@ -308,7 +308,7 @@ workflow NANOPORE { PREPARE_GENOME.out.fasta, PREPARE_GENOME.out.primer_bed, ch_medaka_model.collect().ifEmpty([]), - params.artic_minion_medaka_model, + params.artic_minion_medaka_model ?: '', params.artic_scheme, params.primer_set_version ) From 81e03d5cf5faa1a712acb1e9bb101ffeb398da5a Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 11 Jul 2022 14:39:50 +0100 Subject: [PATCH 52/58] Fix #234 --- CHANGELOG.md | 3 +++ bin/check_samplesheet.py | 5 ----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76cabfe3..dcedaac8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes +- [[#234](https://github.com/nf-core/viralrecon/issues/234)] - Remove replacement of dashes in sample name with underscores - [[#292](https://github.com/nf-core/viralrecon/issues/292)] - Filter empty FastQ files after adapter trimming +- [[#303](https://github.com/nf-core/viralrecon/pull/303)] - New pangolin dbs (4.0.x) not assigning lineages to Sars-CoV-2 samples in MultiQC report correctly - [[#304](https://github.com/nf-core/viralrecon/pull/304)] - Re-factor code of `ivar_variants_to_vcf` script - [[#306](https://github.com/nf-core/viralrecon/issues/306)] - Add contig field information in vcf header in ivar_variants_to_vcf and use bcftools sort +- [[#311](https://github.com/nf-core/viralrecon/issues/311)] - Invalid declaration val medaka_model_string - [[nf-core/rnaseq#764](https://github.com/nf-core/rnaseq/issues/764)] - Test fails when using GCP due to missing tools in the basic biocontainer - Updated pipeline template to [nf-core/tools 2.4.1](https://github.com/nf-core/tools/releases/tag/2.4.1) diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py index f6f797b2..261ddb95 100755 --- a/bin/check_samplesheet.py +++ b/bin/check_samplesheet.py @@ -99,11 +99,6 @@ def check_illumina_samplesheet(file_in, file_out): f"WARNING: Spaces have been replaced by underscores for sample: {sample}" ) sample = sample.replace(" ", "_") - if sample.find("-") != -1: - print( - f"WARNING: Dashes have been replaced by underscores for sample: {sample}" - ) - sample = sample.replace("-", "_") if not sample: print_error("Sample entry has not been specified!", "Line", line) From ab8894633af1bd3482fa38d2243115968fb9b00e Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 11 Jul 2022 14:40:58 +0100 Subject: [PATCH 53/58] Bump pipeline version to 2.5 --- CHANGELOG.md | 2 +- nextflow.config | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dcedaac8..8264667b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unpublished Version / DEV] +## [[2.5](https://github.com/nf-core/viralrecon/releases/tag/2.5)] - 2022-07-13 ### Enhancements & fixes diff --git a/nextflow.config b/nextflow.config index 6daa418c..45c356a8 100644 --- a/nextflow.config +++ b/nextflow.config @@ -235,7 +235,7 @@ manifest { description = 'Assembly and intrahost/low-frequency variant calling for viral samples' mainScript = 'main.nf' nextflowVersion = '!>=21.10.3' - version = '2.5dev' + version = '2.5' } // Load modules.config for DSL2 module specific options From cf4e8dfb51fcce62a000a4f286c5a63485422d5f Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 11 Jul 2022 18:03:59 +0100 Subject: [PATCH 54/58] Fix publishing config relative to v2.4.1 --- conf/modules_illumina.config | 4 ++-- conf/modules_nanopore.config | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config index 7d0ebcca..5cd076ae 100644 --- a/conf/modules_illumina.config +++ b/conf/modules_illumina.config @@ -122,7 +122,7 @@ if (!params.skip_kraken2) { publishDir = [ path: { "${params.outdir}/kraken2" }, mode: params.publish_dir_mode, - pattern: "*.txt" + pattern: "*report.txt" ] } } @@ -667,7 +667,7 @@ if (!params.skip_variants) { publishDir = [ path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}/nextclade" }, mode: params.publish_dir_mode, - pattern: "*.csv" + saveAs: { filename -> filename.endsWith(".csv") && !filename.endsWith("errors.csv") && !filename.endsWith("insertions.csv") ? filename : null } ] } diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config index 5b396618..98d06e91 100644 --- a/conf/modules_nanopore.config +++ b/conf/modules_nanopore.config @@ -240,7 +240,7 @@ if (!params.skip_nextclade) { publishDir = [ path: { "${params.outdir}/${params.artic_minion_caller}/nextclade" }, mode: params.publish_dir_mode, - pattern: "*.csv" + saveAs: { filename -> filename.endsWith(".csv") && !filename.endsWith("errors.csv") && !filename.endsWith("insertions.csv") ? filename : null } ] } From 62b73dc59edf9b4f10deea31dc579e6e54056d65 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Mon, 11 Jul 2022 22:42:38 +0100 Subject: [PATCH 55/58] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8264667b..9cc79312 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Enhancements & fixes +- Default Nextclade dataset shipped with the pipeline has been bumped from `2022-01-18T12:00:00Z` -> `2022-06-14T12:00:00Z` - [[#234](https://github.com/nf-core/viralrecon/issues/234)] - Remove replacement of dashes in sample name with underscores - [[#292](https://github.com/nf-core/viralrecon/issues/292)] - Filter empty FastQ files after adapter trimming - [[#303](https://github.com/nf-core/viralrecon/pull/303)] - New pangolin dbs (4.0.x) not assigning lineages to Sars-CoV-2 samples in MultiQC report correctly From 1a8a3c08065098b4990a88476dab6edf2265845b Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 12 Jul 2022 11:35:41 +0100 Subject: [PATCH 56/58] Add manual version definition for artic modules --- modules.json | 4 ++-- modules/nf-core/modules/artic/guppyplex/main.nf | 3 ++- modules/nf-core/modules/artic/minion/main.nf | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/modules.json b/modules.json index e0646ad4..97603779 100644 --- a/modules.json +++ b/modules.json @@ -7,10 +7,10 @@ "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" }, "artic/guppyplex": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + "git_sha": "589f39c39e05fdd9493e765b1d2b4385d3b68fde" }, "artic/minion": { - "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + "git_sha": "589f39c39e05fdd9493e765b1d2b4385d3b68fde" }, "bandage/image": { "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" diff --git a/modules/nf-core/modules/artic/guppyplex/main.nf b/modules/nf-core/modules/artic/guppyplex/main.nf index 12f6f68c..2fd518e0 100644 --- a/modules/nf-core/modules/artic/guppyplex/main.nf +++ b/modules/nf-core/modules/artic/guppyplex/main.nf @@ -20,6 +20,7 @@ process ARTIC_GUPPYPLEX { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.2.2' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. """ artic \\ guppyplex \\ @@ -30,7 +31,7 @@ process ARTIC_GUPPYPLEX { pigz -p $task.cpus *.fastq cat <<-END_VERSIONS > versions.yml "${task.process}": - artic: \$(artic --version 2>&1 | sed 's/^.*artic //; s/ .*\$//') + artic: $VERSION END_VERSIONS """ } diff --git a/modules/nf-core/modules/artic/minion/main.nf b/modules/nf-core/modules/artic/minion/main.nf index 9d00ef6a..1629d433 100644 --- a/modules/nf-core/modules/artic/minion/main.nf +++ b/modules/nf-core/modules/artic/minion/main.nf @@ -48,6 +48,7 @@ process ARTIC_MINION { model = medaka_model_file ? "--medaka-model ./$medaka_model_file" : "--medaka-model $medaka_model_string" } def hd5_plugin_path = task.ext.hd5_plugin_path ? "export HDF5_PLUGIN_PATH=" + task.ext.hd5_plugin_path : "export HDF5_PLUGIN_PATH=/usr/local/lib/python3.6/site-packages/ont_fast5_api/vbz_plugin" + def VERSION = '1.2.2' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. """ $hd5_plugin_path @@ -66,7 +67,7 @@ process ARTIC_MINION { cat <<-END_VERSIONS > versions.yml "${task.process}": - artic: \$(artic --version 2>&1 | sed 's/^.*artic //; s/ .*\$//') + artic: $VERSION END_VERSIONS """ } From 96c5d3e0af97aea24dbf1f1e383e88f590154e9c Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Tue, 12 Jul 2022 11:48:22 +0100 Subject: [PATCH 57/58] Update docs to update default Nextclade version being used --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index e89913f4..b8416d20 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -391,7 +391,7 @@ You can use a similar approach to update the version of Nextclade used by the pi ##### Nextclade datasets -A [`nextclade dataset`](https://docs.nextstrain.org/projects/nextclade/en/latest/user/datasets.html#nextclade-datasets) feature was introduced in [Nextclade CLI v1.3.0](https://github.com/nextstrain/nextclade/releases/tag/1.3.0) that fetches input genome files such as reference sequences and trees from a central dataset repository. We have uploaded Nextclade dataset [v2022-01-18](https://github.com/nextstrain/nextclade_data/releases/tag/2022-01-24--21-27-29--UTC) to [nf-core/test-datasets](https://github.com/nf-core/test-datasets/blob/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-01-18T12_00_00Z.tar.gz?raw=true), and for reproducibility, this will be used by default if you specify `--genome 'MN908947.3'` when running the pipeline. However, there are a number of ways you can use a more recent version of the dataset: +A [`nextclade dataset`](https://docs.nextstrain.org/projects/nextclade/en/latest/user/datasets.html#nextclade-datasets) feature was introduced in [Nextclade CLI v1.3.0](https://github.com/nextstrain/nextclade/releases/tag/1.3.0) that fetches input genome files such as reference sequences and trees from a central dataset repository. We have uploaded Nextclade dataset [v2022-06-14](https://github.com/nextstrain/nextclade_data/releases/tag/2022-06-16--16-03-24--UTC) to [nf-core/test-datasets](https://github.com/nf-core/test-datasets/blob/viralrecon/genome/MN908947.3/nextclade_sars-cov-2_MN908947_2022-06-14T12_00_00Z.tar.gz?raw=true), and for reproducibility, this will be used by default if you specify `--genome 'MN908947.3'` when running the pipeline. However, there are a number of ways you can use a more recent version of the dataset: - Supply your own by setting: `--nextclade_dataset ` - Let the pipeline create and use the latest version by setting: `--nextclade_dataset false --nextclade_dataset_tag false` From f7f8165c76fadd97b6c387d90266b08fe7144dd5 Mon Sep 17 00:00:00 2001 From: Harshil Patel Date: Wed, 13 Jul 2022 13:51:30 +0100 Subject: [PATCH 58/58] Fix #316 --- CHANGELOG.md | 1 + workflows/illumina.nf | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cc79312..a75796a4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - [[#304](https://github.com/nf-core/viralrecon/pull/304)] - Re-factor code of `ivar_variants_to_vcf` script - [[#306](https://github.com/nf-core/viralrecon/issues/306)] - Add contig field information in vcf header in ivar_variants_to_vcf and use bcftools sort - [[#311](https://github.com/nf-core/viralrecon/issues/311)] - Invalid declaration val medaka_model_string +- [[#316](https://github.com/nf-core/viralrecon/issues/316)] - Variant calling isn't run when using --skip_asciigenome with metagenomic data - [[nf-core/rnaseq#764](https://github.com/nf-core/rnaseq/issues/764)] - Test fails when using GCP due to missing tools in the basic biocontainer - Updated pipeline template to [nf-core/tools 2.4.1](https://github.com/nf-core/tools/releases/tag/2.4.1) diff --git a/workflows/illumina.nf b/workflows/illumina.nf index 1fa55548..f4010465 100644 --- a/workflows/illumina.nf +++ b/workflows/illumina.nf @@ -415,8 +415,8 @@ workflow ILLUMINA { VARIANTS_IVAR ( ch_bam, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.fai, - PREPARE_GENOME.out.chrom_sizes, + (params.protocol == 'amplicon' || !params.skip_asciigenome) ? PREPARE_GENOME.out.fai : [], + (params.protocol == 'amplicon' || !params.skip_asciigenome) ? PREPARE_GENOME.out.chrom_sizes : [], PREPARE_GENOME.out.gff, (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], PREPARE_GENOME.out.snpeff_db, @@ -439,7 +439,7 @@ workflow ILLUMINA { VARIANTS_BCFTOOLS ( ch_bam, PREPARE_GENOME.out.fasta, - PREPARE_GENOME.out.chrom_sizes, + (params.protocol == 'amplicon' || !params.skip_asciigenome) ? PREPARE_GENOME.out.chrom_sizes : [], PREPARE_GENOME.out.gff, (params.protocol == 'amplicon' && params.primer_bed) ? PREPARE_GENOME.out.primer_bed : [], PREPARE_GENOME.out.snpeff_db,