diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000000000..a9229959f --- /dev/null +++ b/.editorconfig @@ -0,0 +1,27 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true +indent_size = 4 +indent_style = space + +[*.{md,yml,yaml,html,css,scss,js,cff}] +indent_size = 2 + +# These files are edited and tested upstream in nf-core/modules +[/modules/nf-core/**] +charset = unset +end_of_line = unset +insert_final_newline = unset +trim_trailing_whitespace = unset +indent_style = unset +indent_size = unset + +[/assets/email*] +indent_size = unset + +[/assets/blacklists/GRCh37-blacklist.bed] +trim_trailing_whitespace = unset diff --git a/.gitattributes b/.gitattributes index 7fe55006f..050bb1203 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,3 @@ *.config linguist-language=nextflow +modules/nf-core/** linguist-generated +subworkflows/nf-core/** linguist-generated diff --git a/.github/.dockstore.yml b/.github/.dockstore.yml new file mode 100644 index 000000000..191fabd22 --- /dev/null +++ b/.github/.dockstore.yml @@ -0,0 +1,6 @@ +# Dockstore config version, not pipeline version +version: 1.2 +workflows: + - subclass: nfl + primaryDescriptorPath: /nextflow.config + publish: True diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 484c13c31..2bfaa6814 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -15,11 +15,11 @@ Contributions to the code are even more welcome ;) If you'd like to write some code for nf-core/chipseq, the standard workflow is as follows: -1. Check that there isn't already an issue about your idea in the [nf-core/chipseq issues](https://github.com/nf-core/chipseq/issues) to avoid duplicating work - * If there isn't one already, please create one so that others know you're working on this +1. Check that there isn't already an issue about your idea in the [nf-core/chipseq issues](https://github.com/nf-core/chipseq/issues) to avoid duplicating work. If there isn't one already, please create one so that others know you're working on this 2. [Fork](https://help.github.com/en/github/getting-started-with-github/fork-a-repo) the [nf-core/chipseq repository](https://github.com/nf-core/chipseq) to your GitHub account -3. Make the necessary changes / additions within your forked repository -4. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged +3. Make the necessary changes / additions within your forked repository following [Pipeline conventions](#pipeline-contribution-conventions) +4. Use `nf-core schema build` and add any new parameters to the pipeline JSON schema (requires [nf-core tools](https://github.com/nf-core/tools) >= 1.10). +5. Submit a Pull Request against the `dev` branch and wait for the code to be reviewed and merged If you're not used to this workflow with git, you can start with some [docs from GitHub](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests) or even their [excellent `git` resources](https://try.github.io/). @@ -30,14 +30,14 @@ Typically, pull-requests are only fully reviewed when these tests are passing, t There are typically two types of tests that run: -### Lint Tests +### Lint tests `nf-core` has a [set of guidelines](https://nf-co.re/developers/guidelines) which all pipelines must adhere to. To enforce these and ensure that all pipelines stay in sync, we have developed a helper tool which runs checks on the pipeline code. This is in the [nf-core/tools repository](https://github.com/nf-core/tools) and once installed can be run locally with the `nf-core lint ` command. If any failures or warnings are encountered, please follow the listed URL for more documentation. -### Pipeline Tests +### Pipeline tests Each `nf-core` pipeline should be set up with a minimal set of test-data. `GitHub Actions` then runs the pipeline on this data to ensure that it exits successfully. @@ -46,12 +46,58 @@ These tests are run both with the latest available version of `Nextflow` and als ## Patch -: warning: Only in the unlikely and regretful event of a release happening with a bug. +:warning: Only in the unlikely and regretful event of a release happening with a bug. -* On your own fork, make a new branch `patch` based on `upstream/master`. -* Fix the bug, and bump version (X.Y.Z+1). -* A PR should be made on `master` from patch to directly this particular bug. +- On your own fork, make a new branch `patch` based on `upstream/master`. +- Fix the bug, and bump version (X.Y.Z+1). +- A PR should be made on `master` from patch to directly this particular bug. ## Getting help -For further information/help, please consult the [nf-core/chipseq documentation](https://nf-co.re/nf-core/chipseq/docs) and don't hesitate to get in touch on the nf-core Slack [#chipseq](https://nfcore.slack.com/channels/chipseq) channel ([join our Slack here](https://nf-co.re/join/slack)). +For further information/help, please consult the [nf-core/chipseq documentation](https://nf-co.re/chipseq/usage) and don't hesitate to get in touch on the nf-core Slack [#chipseq](https://nfcore.slack.com/channels/chipseq) channel ([join our Slack here](https://nf-co.re/join/slack)). + +## Pipeline contribution conventions + +To make the nf-core/chipseq code and processing logic more understandable for new contributors and to ensure quality, we semi-standardise the way the code and other contributions are written. + +### Adding a new step + +If you wish to contribute a new step, please use the following coding standards: + +1. Define the corresponding input channel into your new process from the expected previous process channel +2. Write the process block (see below). +3. Define the output channel if needed (see below). +4. Add any new parameters to `nextflow.config` with a default (see below). +5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core schema build` tool). +6. Add sanity checks and validation for all relevant parameters. +7. Perform local tests to validate that the new code works as expected. +8. If applicable, add a new test command in `.github/workflow/ci.yml`. +9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. +10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. + +### Default values + +Parameters should be initialised / defined with default values in `nextflow.config` under the `params` scope. + +Once there, use `nf-core schema build` to add to `nextflow_schema.json`. + +### Default processes resource requirements + +Sensible defaults for process resource requirements (CPUs / memory / time) for a process should be defined in `conf/base.config`. These should generally be specified generic with `withLabel:` selectors so they can be shared across multiple processes/steps of the pipeline. A nf-core standard set of labels that should be followed where possible can be seen in the [nf-core pipeline template](https://github.com/nf-core/tools/blob/master/nf_core/pipeline-template/conf/base.config), which has the default process as a single core-process, and then different levels of multi-core configurations for increasingly large memory requirements defined with standardised labels. + +The process resources can be passed on to the tool dynamically within the process with the `${task.cpu}` and `${task.memory}` variables in the `script:` block. + +### Naming schemes + +Please use the following naming schemes, to make it easy to understand what is going where. + +- initial process channel: `ch_output_from_` +- intermediate and terminal channels: `ch__for_` + +### Nextflow version bumping + +If you are using a new feature from core Nextflow, you may bump the minimum required version of nextflow in the pipeline with: `nf-core bump-version --nextflow . [min-nf-version]` + +### Images and figures + +For overview images and other documents we follow the nf-core [style guidelines and examples](https://nf-co.re/developers/design_guidelines). diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index f394683ed..000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,42 +0,0 @@ -# nf-core/chipseq bug report - -Hi there! - -Thanks for telling us about a problem with the pipeline. -Please delete this text and anything that's not relevant from the template below: - -## Describe the bug - -A clear and concise description of what the bug is. - -## Steps to reproduce - -Steps to reproduce the behaviour: - -1. Command line: `nextflow run ...` -2. See error: _Please provide your error message_ - -## Expected behaviour - -A clear and concise description of what you expected to happen. - -## System - -- Hardware: -- Executor: -- OS: -- Version - -## Nextflow Installation - -- Version: - -## Container engine - -- Engine: -- version: -- Image tag: - -## Additional context - -Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 000000000..1d09e717d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,50 @@ +name: Bug report +description: Report something that is broken or incorrect +labels: bug +body: + - type: markdown + attributes: + value: | + Before you post this issue, please check the documentation: + + - [nf-core website: troubleshooting](https://nf-co.re/usage/troubleshooting) + - [nf-core/chipseq pipeline documentation](https://nf-co.re/chipseq/usage) + + - type: textarea + id: description + attributes: + label: Description of the bug + description: A clear and concise description of what the bug is. + validations: + required: true + + - type: textarea + id: command_used + attributes: + label: Command used and terminal output + description: Steps to reproduce the behaviour. Please paste the command you used to launch the pipeline and the output from your terminal. + render: console + placeholder: | + $ nextflow run ... + + Some output where something broke + + - type: textarea + id: files + attributes: + label: Relevant files + description: | + Please drag and drop the relevant files here. Create a `.zip` archive if the extension is not allowed. + Your verbose log file `.nextflow.log` is often useful _(this is a hidden file in the directory where you launched the pipeline)_ as well as custom Nextflow configuration files. + + - type: textarea + id: system + attributes: + label: System information + description: | + * Nextflow version _(eg. 21.10.3)_ + * Hardware _(eg. HPC, Desktop, Cloud)_ + * Executor _(eg. slurm, local, awsbatch)_ + * Container engine: _(e.g. Docker, Singularity, Conda, Podman, Shifter or Charliecloud)_ + * OS _(eg. CentOS Linux, macOS, Linux Mint)_ + * Version of nf-core/chipseq _(eg. 1.1, 1.5, 1.8.2)_ diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml new file mode 100644 index 000000000..997624c21 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -0,0 +1,7 @@ +contact_links: + - name: Join nf-core + url: https://nf-co.re/join + about: Please join the nf-core community here + - name: "Slack #chipseq channel" + url: https://nfcore.slack.com/channels/chipseq + about: Discussion about the nf-core/chipseq pipeline diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index c3bbf1942..000000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,24 +0,0 @@ -# nf-core/chipseq feature request - -Hi there! - -Thanks for suggesting a new feature for the pipeline! -Please delete this text and anything that's not relevant from the template below: - -## Is your feature request related to a problem? Please describe - -A clear and concise description of what the problem is. - -Ex. I'm always frustrated when [...] - -## Describe the solution you'd like - -A clear and concise description of what you want to happen. - -## Describe alternatives you've considered - -A clear and concise description of any alternative solutions or features you've considered. - -## Additional context - -Add any other context about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml new file mode 100644 index 000000000..d8c130a54 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yml @@ -0,0 +1,11 @@ +name: Feature request +description: Suggest an idea for the nf-core/chipseq pipeline +labels: enhancement +body: + - type: textarea + id: description + attributes: + label: Description of feature + description: Please describe your suggestion for a new feature. It might help to describe a problem or use case, plus any alternatives that you have considered. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index aed991c4b..8da27af0e 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,3 +1,4 @@ + + ## PR checklist -- [ ] This comment contains a description of changes (with reason) +- [ ] This comment contains a description of changes (with reason). - [ ] If you've fixed a bug or added code that should be tested, add tests! -- [ ] If necessary, also make a PR on the [nf-core/chipseq branch on the nf-core/test-datasets repo](https://github.com/nf-core/test-datasets/pull/new/nf-core/chipseq) -- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker`). -- [ ] Make sure your code lints (`nf-core lint .`). -- [ ] Documentation in `docs` is updated -- [ ] `CHANGELOG.md` is updated -- [ ] `README.md` is updated - -**Learn more about contributing:** [CONTRIBUTING.md](https://github.com/nf-core/chipseq/tree/master/.github/CONTRIBUTING.md) +- [ ] If you've added a new tool - have you followed the pipeline conventions in the [contribution docs](https://github.com/nf-core/chipseq/tree/master/.github/CONTRIBUTING.md)- [ ] If necessary, also make a PR on the nf-core/chipseq _branch_ on the [nf-core/test-datasets](https://github.com/nf-core/test-datasets) repository. +- [ ] Make sure your code lints (`nf-core lint`). +- [ ] Ensure the test suite passes (`nextflow run . -profile test,docker --outdir `). +- [ ] Usage Documentation in `docs/usage.md` is updated. +- [ ] Output Documentation in `docs/output.md` is updated. +- [ ] `CHANGELOG.md` is updated. +- [ ] `README.md` is updated (including new tool citations and authors/contributors). diff --git a/.github/markdownlint.yml b/.github/markdownlint.yml deleted file mode 100644 index cc8e0d331..000000000 --- a/.github/markdownlint.yml +++ /dev/null @@ -1,9 +0,0 @@ -# Markdownlint configuration file -default: true, -line-length: false -no-duplicate-header: - siblings_only: true -MD033: - allowed_elements: [details, summary, p, img] -MD007: - indent: 4 diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 6d9ce127a..5a1c3622d 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -1,36 +1,31 @@ -name: nf-core AWS full size tests -# This workflow is triggered on releases. -# It runs the -profile 'test_full' on AWS batch - -on: - release: - types: [published] - -jobs: - run-awstest: - if: github.repository == 'nf-core/chipseq' - name: Run AWS test - runs-on: ubuntu-latest - steps: - - name: Setup Miniconda - uses: goanpeca/setup-miniconda@v1.0.2 - with: - auto-update-conda: true - python-version: 3.7 - - name: Install awscli - run: conda install -c conda-forge awscli - - name: Start AWS batch job - env: - AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY_ID}} - AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}} - TOWER_ACCESS_TOKEN: ${{secrets.AWS_TOWER_TOKEN}} - #AWS_JOB_DEFINITION: ${{secrets.AWS_JOB_DEFINITION}} - AWS_JOB_QUEUE: ${{secrets.AWS_JOB_QUEUE}} - AWS_S3_BUCKET: ${{secrets.AWS_S3_BUCKET}} - run: | # Submits job to AWS batch using a 'nextflow-4GiB' job definition. Setting JVM options to "-XX:+UseG1GC" for more efficient garbage collection when staging remote files. - aws batch submit-job \ - --region eu-west-1 \ - --job-name nf-core-chipseq \ - --job-queue $AWS_JOB_QUEUE \ - --job-definition nextflow-4GiB \ - --container-overrides '{"command": ["nf-core/chipseq", "-r '"${GITHUB_SHA}"' -profile test_full --outdir s3://'"${AWS_S3_BUCKET}"'/chipseq/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/chipseq/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}, {"name": "NXF_OPTS", "value": "-XX:+UseG1GC"}]}' +name: nf-core AWS full size tests +# This workflow is triggered on published releases. +# It can be additionally triggered manually with GitHub actions workflow dispatch button. +# It runs the -profile 'test_full' on AWS batch + +on: + release: + types: [published] + workflow_dispatch: +jobs: + run-tower: + name: Run AWS full tests + if: github.repository == 'nf-core/chipseq' + runs-on: ubuntu-latest + strategy: + matrix: + aligner: ["bwa", "bowtie2", "chromap", "star"] + steps: + - name: Launch workflow via tower + uses: nf-core/tower-action@v3 + with: + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/chipseq/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/chipseq/results-${{ github.sha }}" + "aligner": "${{ matrix.aligner }}" + } + profiles: test_full,aws_tower diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index aeccb336e..73640cc6f 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -1,38 +1,25 @@ name: nf-core AWS test -# This workflow is triggered on push to the master branch. +# This workflow can be triggered manually with the GitHub actions workflow dispatch button. # It runs the -profile 'test' on AWS batch on: - push: - branches: - - master - - dev # just for testing purposes, to be removed - + workflow_dispatch: jobs: - run-awstest: + run-tower: + name: Run AWS tests if: github.repository == 'nf-core/chipseq' - name: Run AWS test runs-on: ubuntu-latest steps: - - name: Setup Miniconda - uses: goanpeca/setup-miniconda@v1.0.2 + # Launch workflow using Tower CLI tool action + - name: Launch workflow via tower + uses: nf-core/tower-action@v3 with: - auto-update-conda: true - python-version: 3.7 - - name: Install awscli - run: conda install -c conda-forge awscli - - name: Start AWS batch job - env: - AWS_ACCESS_KEY_ID: ${{secrets.AWS_ACCESS_KEY_ID}} - AWS_SECRET_ACCESS_KEY: ${{secrets.AWS_SECRET_ACCESS_KEY}} - TOWER_ACCESS_TOKEN: ${{secrets.AWS_TOWER_TOKEN}} - #AWS_JOB_DEFINITION: ${{secrets.AWS_JOB_DEFINITION}} - AWS_JOB_QUEUE: ${{secrets.AWS_JOB_QUEUE}} - AWS_S3_BUCKET: ${{secrets.AWS_S3_BUCKET}} - run: | # Submits job to AWS batch using a 'nextflow-4GiB' job definition. Setting JVM options to "-XX:+UseG1GC" for more efficient garbage collection when staging remote files. - aws batch submit-job \ - --region eu-west-1 \ - --job-name nf-core-chipseq \ - --job-queue $AWS_JOB_QUEUE \ - --job-definition nextflow-4GiB \ - --container-overrides '{"command": ["nf-core/chipseq", "-r '"${GITHUB_SHA}"' -profile test --outdir s3://'"${AWS_S3_BUCKET}"'/chipseq/results-'"${GITHUB_SHA}"' -w s3://'"${AWS_S3_BUCKET}"'/chipseq/work-'"${GITHUB_SHA}"' -with-tower"], "environment": [{"name": "TOWER_ACCESS_TOKEN", "value": "'"$TOWER_ACCESS_TOKEN"'"}, {"name": "NXF_OPTS", "value": "-XX:+UseG1GC"}]}' + workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} + access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} + compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/chipseq/work-${{ github.sha }} + parameters: | + { + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/chipseq/results-test-${{ github.sha }}" + } + profiles: test,aws_tower diff --git a/.github/workflows/branch.yml b/.github/workflows/branch.yml index aa94952dc..1e03d409b 100644 --- a/.github/workflows/branch.yml +++ b/.github/workflows/branch.yml @@ -2,15 +2,43 @@ name: nf-core branch protection # This workflow is triggered on PRs to master branch on the repository # It fails when someone tries to make a PR against the nf-core `master` branch instead of `dev` on: - pull_request: - branches: - - master + pull_request_target: + branches: [master] jobs: test: - runs-on: ubuntu-18.04 + runs-on: ubuntu-latest steps: - # PRs are only ok if coming from an nf-core `dev` branch or a fork `patch` branch + # PRs to the nf-core repo master branch are only ok if coming from the nf-core repo `dev` or any `patch` branches - name: Check PRs + if: github.repository == 'nf-core/chipseq' run: | - { [[ $(git remote get-url origin) == *nf-core/chipseq ]] && [[ ${GITHUB_HEAD_REF} = "dev" ]]; } || [[ ${GITHUB_HEAD_REF} == "patch" ]] + { [[ ${{github.event.pull_request.head.repo.full_name }} == nf-core/chipseq ]] && [[ $GITHUB_HEAD_REF = "dev" ]]; } || [[ $GITHUB_HEAD_REF == "patch" ]] + + # If the above check failed, post a comment on the PR explaining the failure + # NOTE - this doesn't currently work if the PR is coming from a fork, due to limitations in GitHub actions secrets + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 + with: + message: | + ## This PR is against the `master` branch :x: + + * Do not close this PR + * Click _Edit_ and change the `base` to `dev` + * This CI test will remain failed until you push a new commit + + --- + + Hi @${{ github.event.pull_request.user.login }}, + + It looks like this pull-request is has been made against the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `master` branch. + The `master` branch on nf-core repositories should always contain code from the latest release. + Because of this, PRs to `master` are only allowed if they come from the [${{github.event.pull_request.head.repo.full_name }}](https://github.com/${{github.event.pull_request.head.repo.full_name }}) `dev` branch. + + You do not need to close this PR, you can change the target branch to `dev` by clicking the _"Edit"_ button at the top of this page. + Note that even after this, the test will continue to show as failing until you push a new commit. + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e00789bce..520b9d2bd 100755 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,5 @@ name: nf-core CI -# This workflow is triggered on releases and pull-requests. -# It runs the pipeline with the minimal test dataset to check that it completes without any syntax errors +# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors on: push: branches: @@ -9,80 +8,46 @@ on: release: types: [published] +env: + NXF_ANSI_LOG: false + jobs: test: - name: Run workflow tests + name: Run pipeline with test data # Only run on push if this is the nf-core dev branch (merged PRs) - if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/chipseq') }} + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/chipseq') }}" runs-on: ubuntu-latest - env: - NXF_VER: ${{ matrix.nxf_ver }} - NXF_ANSI_LOG: false strategy: matrix: - # Nextflow versions: check pipeline minimum and current latest - nxf_ver: ['19.10.0', ''] + NXF_VER: + - "21.10.3" + - "latest-everything" steps: - name: Check out pipeline code uses: actions/checkout@v2 - - name: Check if Dockerfile or Conda environment changed - uses: technote-space/get-diff-action@v1 - with: - PREFIX_FILTER: | - Dockerfile - environment.yml - - - name: Build new docker image - if: env.GIT_DIFF - run: docker build --no-cache . -t nfcore/chipseq:1.2.2 - - - name: Pull docker image - if: ${{ !env.GIT_DIFF }} - run: | - docker pull nfcore/chipseq:dev - docker tag nfcore/chipseq:dev nfcore/chipseq:1.2.2 - - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 + with: + version: "${{ matrix.NXF_VER }}" - name: Run pipeline with test data run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results parameters: name: Test workflow parameters if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/chipseq') }} runs-on: ubuntu-latest - env: - NXF_VER: '19.10.0' - NXF_ANSI_LOG: false strategy: matrix: - parameters: [--single_end, --skip_trimming, --skip_consensus_peaks] + parameters: + - "--skip_trimming" + - "--skip_consensus_peaks" steps: - name: Check out pipeline code uses: actions/checkout@v2 - - name: Check if Dockerfile or Conda environment changed - uses: technote-space/get-diff-action@v1 - with: - PREFIX_FILTER: | - Dockerfile - environment.yml - - - name: Build new docker image - if: env.GIT_DIFF - run: docker build --no-cache . -t nfcore/chipseq:1.2.2 - - - name: Pull docker image - if: ${{ !env.GIT_DIFF }} - run: | - docker pull nfcore/chipseq:dev - docker tag nfcore/chipseq:dev nfcore/chipseq:1.2.2 - - name: Install Nextflow run: | wget -qO- get.nextflow.io | bash @@ -90,36 +55,27 @@ jobs: - name: Run pipeline with various parameters run: | - nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} + nextflow run ${GITHUB_WORKSPACE} -profile test,docker ${{ matrix.parameters }} --outdir ./results - push_dockerhub: - name: Push new Docker image to Docker Hub + aligners: + name: Test available aligners + if: ${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/chipseq') }} runs-on: ubuntu-latest - # Only run if the tests passed - needs: test - # Only run for the nf-core repo, for releases and merged PRs - if: ${{ github.repository == 'nf-core/chipseq' && (github.event_name == 'release' || github.event_name == 'push') }} - env: - DOCKERHUB_USERNAME: ${{ secrets.DOCKERHUB_USERNAME }} - DOCKERHUB_PASS: ${{ secrets.DOCKERHUB_PASS }} + strategy: + matrix: + aligner: + - "bowtie2" + - "chromap" + - "star" steps: - name: Check out pipeline code uses: actions/checkout@v2 - - name: Build new docker image - run: docker build --no-cache . -t nfcore/chipseq:latest - - - name: Push Docker image to DockerHub (dev) - if: ${{ github.event_name == 'push' }} + - name: Install Nextflow run: | - echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin - docker tag nfcore/chipseq:latest nfcore/chipseq:dev - docker push nfcore/chipseq:dev + wget -qO- get.nextflow.io | bash + sudo mv nextflow /usr/local/bin/ - - name: Push Docker image to DockerHub (release) - if: ${{ github.event_name == 'release' }} + - name: Run pipeline with the different aligners available run: | - echo "$DOCKERHUB_PASS" | docker login -u "$DOCKERHUB_USERNAME" --password-stdin - docker push nfcore/chipseq:latest - docker tag nfcore/chipseq:latest nfcore/chipseq:${{ github.event.release.tag_name }} - docker push nfcore/chipseq:${{ github.event.release.tag_name }} + nextflow run ${GITHUB_WORKSPACE} -profile test,docker --aligner ${{ matrix.aligner }} --outdir ./results diff --git a/.github/workflows/fix-linting.yml b/.github/workflows/fix-linting.yml new file mode 100644 index 000000000..b4f90734c --- /dev/null +++ b/.github/workflows/fix-linting.yml @@ -0,0 +1,55 @@ +name: Fix linting from a comment +on: + issue_comment: + types: [created] + +jobs: + deploy: + # Only run if comment is on a PR with the main repo, and if it contains the magic keywords + if: > + contains(github.event.comment.html_url, '/pull/') && + contains(github.event.comment.body, '@nf-core-bot fix linting') && + github.repository == 'nf-core/chipseq' + runs-on: ubuntu-latest + steps: + # Use the @nf-core-bot token to check out so we can push later + - uses: actions/checkout@v3 + with: + token: ${{ secrets.nf_core_bot_auth_token }} + + # Action runs on the issue comment, so we don't get the PR by default + # Use the gh cli to check out the PR + - name: Checkout Pull Request + run: gh pr checkout ${{ github.event.issue.number }} + env: + GITHUB_TOKEN: ${{ secrets.nf_core_bot_auth_token }} + + - uses: actions/setup-node@v2 + + - name: Install Prettier + run: npm install -g prettier @prettier/plugin-php + + # Check that we actually need to fix something + - name: Run 'prettier --check' + id: prettier_status + run: | + if prettier --check ${GITHUB_WORKSPACE}; then + echo "::set-output name=result::pass" + else + echo "::set-output name=result::fail" + fi + + - name: Run 'prettier --write' + if: steps.prettier_status.outputs.result == 'fail' + run: prettier --write ${GITHUB_WORKSPACE} + + - name: Commit & push changes + if: steps.prettier_status.outputs.result == 'fail' + run: | + git config user.email "core@nf-co.re" + git config user.name "nf-core-bot" + git config push.default upstream + git add . + git status + git commit -m "[automated] Fix linting with Prettier" + git push diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index eb66c1449..8a5ce69b8 100755 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -1,6 +1,7 @@ name: nf-core linting # This workflow is triggered on pushes and PRs to the repository. -# It runs the `nf-core lint` and markdown lint tests to ensure that the code meets the nf-core guidelines +# It runs the `nf-core lint` and markdown lint tests to ensure +# that the code meets the nf-core guidelines. on: push: pull_request: @@ -8,44 +9,75 @@ on: types: [published] jobs: - Markdown: + EditorConfig: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: actions/setup-node@v1 - with: - node-version: '10' - - name: Install markdownlint - run: npm install -g markdownlint-cli - - name: Run Markdownlint - run: markdownlint ${GITHUB_WORKSPACE} -c ${GITHUB_WORKSPACE}/.github/markdownlint.yml - YAML: + + - uses: actions/setup-node@v2 + + - name: Install editorconfig-checker + run: npm install -g editorconfig-checker + + - name: Run ECLint check + run: editorconfig-checker -exclude README.md $(find .* -type f | grep -v '.git\|.py\|.md\|json\|yml\|yaml\|html\|css\|work\|.nextflow\|build\|nf_core.egg-info\|log.txt\|Makefile') + + Prettier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v1 - - uses: actions/setup-node@v1 + - uses: actions/checkout@v2 + + - uses: actions/setup-node@v2 + + - name: Install Prettier + run: npm install -g prettier + + - name: Run Prettier --check + run: prettier --check ${GITHUB_WORKSPACE} + + PythonBlack: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Check code lints with Black + uses: psf/black@stable + + # If the above check failed, post a comment on the PR explaining the failure + - name: Post PR comment + if: failure() + uses: mshick/add-pr-comment@v1 with: - node-version: '10' - - name: Install yaml-lint - run: npm install -g yaml-lint - - name: Run yaml-lint - run: yamllint $(find ${GITHUB_WORKSPACE} -type f -name "*.yml") + message: | + ## Python linting (`black`) is failing + + To keep the code consistent with lots of contributors, we run automated code consistency checks. + To fix this CI test, please run: + + * Install [`black`](https://black.readthedocs.io/en/stable/): `pip install black` + * Fix formatting errors in your pipeline: `black .` + + Once you push these changes the test should pass, and you can hide this comment :+1: + + We highly recommend setting up Black in your code editor so that this formatting is done automatically on save. Ask about it on Slack for help! + + Thanks again for your contribution! + repo-token: ${{ secrets.GITHUB_TOKEN }} + allow-repeats: false + nf-core: runs-on: ubuntu-latest steps: - - name: Check out pipeline code uses: actions/checkout@v2 - name: Install Nextflow - run: | - wget -qO- get.nextflow.io | bash - sudo mv nextflow /usr/local/bin/ + uses: nf-core/setup-nextflow@v1 - - uses: actions/setup-python@v1 + - uses: actions/setup-python@v3 with: - python-version: '3.6' - architecture: 'x64' + python-version: "3.7" + architecture: "x64" - name: Install dependencies run: | @@ -57,5 +89,18 @@ jobs: GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} - run: nf-core lint ${GITHUB_WORKSPACE} + run: nf-core -l lint_log.txt lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@v2 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml new file mode 100644 index 000000000..04758f61e --- /dev/null +++ b/.github/workflows/linting_comment.yml @@ -0,0 +1,28 @@ +name: nf-core linting comment +# This workflow is triggered after the linting action is complete +# It posts an automated comment to the PR, even if the PR is coming from a fork + +on: + workflow_run: + workflows: ["nf-core linting"] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Download lint results + uses: dawidd6/action-download-artifact@v2 + with: + workflow: linting.yml + workflow_conclusion: completed + + - name: Get PR number + id: pr_number + run: echo "::set-output name=pr_number::$(cat linting-logs/PR_number.txt)" + + - name: Post PR comment + uses: marocchino/sticky-pull-request-comment@v2 + with: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + number: ${{ steps.pr_number.outputs.pr_number }} + path: linting-logs/lint_results.md diff --git a/.gitignore b/.gitignore index 6354f3708..5124c9ac7 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,6 @@ work/ data/ results/ .DS_Store -tests/ testing/ +testing* *.pyc diff --git a/.gitpod.yml b/.gitpod.yml new file mode 100644 index 000000000..85d95ecc8 --- /dev/null +++ b/.gitpod.yml @@ -0,0 +1,14 @@ +image: nfcore/gitpod:latest + +vscode: + extensions: # based on nf-core.nf-core-extensionpack + - codezombiech.gitignore # Language support for .gitignore files + # - cssho.vscode-svgviewer # SVG viewer + - esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code + - eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed + - EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files + - Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar + - mechatroner.rainbow-csv # Highlight columns in csv files in different colors + # - nextflow.nextflow # Nextflow syntax highlighting + - oderwat.indent-rainbow # Highlight indentation level + - streetsidesoftware.code-spell-checker # Spelling checker for source code diff --git a/.nf-core.yml b/.nf-core.yml new file mode 100644 index 000000000..3805dc81c --- /dev/null +++ b/.nf-core.yml @@ -0,0 +1 @@ +repository_type: pipeline diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 000000000..d0e7ae589 --- /dev/null +++ b/.prettierignore @@ -0,0 +1,9 @@ +email_template.html +.nextflow* +work/ +data/ +results/ +.DS_Store +testing/ +testing* +*.pyc diff --git a/.prettierrc.yml b/.prettierrc.yml new file mode 100644 index 000000000..c81f9a766 --- /dev/null +++ b/.prettierrc.yml @@ -0,0 +1 @@ +printWidth: 120 diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a66ef72c..ea9b46068 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,128 +1,212 @@ # nf-core/chipseq: Changelog -The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) -and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). - -## [1.2.2] - 2021-04-22 - -* [#206](https://github.com/nf-core/chipseq/issues/206) - Minor patch release to fix Conda environment +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [[2.0.0](https://github.com/nf-core/chipseq/releases/tag/2.0.0)] - 2022-10-03 + +### Enhancements & fixes + +- Pipeline has been re-implemented in [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) +- All software containers are now exclusively obtained from [Biocontainers](https://biocontainers.pro/#/registry) +- Updated pipeline template to [nf-core/tools 2.5.1](https://github.com/nf-core/tools/releases/tag/2.5.1) +- [[#128](https://github.com/nf-core/chipseq/issues/128)] - Filter files with no peaks to avoid errors in downstream processes +- [[#220](https://github.com/nf-core/chipseq/issues/220)] - Fix `phantompeakqualtools` protection stack overflow error +- [[#233](https://github.com/nf-core/chipseq/issues/233)] - Add `chromap` to the available aligners +- Bump minimum Nextflow version from `21.04.0` -> `21.10.3` +- Added `python3` shebang to appropriate scripts in `bin/` directory +- [[#160](https://github.com/nf-core/chipseq/issues/160)] - Add `bowtie2` and `star` as available aligners, via the `--aligner` parameter +- Add `--save_unaligned` parameter (only available for `bowtie2` and `star`) +- Update `igenomes.config` to fetch whole `BWAIndex/version0.6.0/` folder +- [[228](https://github.com/nf-core/chipseq/issues/228)] - Update blacklist bed files. +- [nf-core/tools#1415](https://github.com/nf-core/tools/issues/1415) - Make `--outdir` a mandatory parameter +- [[282](https://github.com/nf-core/chipseq/issues/282)] - Fix `genome.fa` publication for IGV. +- [[280](https://github.com/nf-core/chipseq/issues/280)] - Update `macs_gsize` in `igenomes.config`, create a new `--read_length` parameter and implement the logic to calculate `--macs_gsize` when the parameter is missing +- Eliminate `if` conditions from `deseq2_qc` and `macs2_consensus` (local module and use `ext.when` instead) +- Remove `deseq2` differential binding analysis of consensus peaks. +- [[280](https://github.com/nf-core/chipseq/issues/291) - Filter paired-end files produced by `chromap` since the resulting `BAM` files can not be processed downstream. +- Add bytesize link to readme. + +### Parameters + +| Old parameter | New parameter | +| ---------------------- | ----------------------- | +| `--conda` | `--enable_conda` | +| `--skip_diff_analysis` | `--skip_deseq2_qc` | +| | `--skip_qc` | +| | `--aligner` | +| | `--save_unaligned` | +| | `--read_length` | +| | `--multiqc_title` | +| | `--gff` | +| | `--bowtie2_index` | +| | `--chromap_index` | +| | `--star_index` | +| | `--validate_params` | +| | `--show_hidden_params` | +| | `--config_profile_name` | +| `--clusterOptions` | | +| `--single_end` | | +| `--name` | | +| `--hostnames` | | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if parameter information isn't present. + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ----------------------- | ----------- | ----------- | +| `samtools` | 1.10 | 1.15.1 | +| `picard` | 2.23.1 | 2.27.4 | +| `bamtools` | 2.5.1 | 2.5.2 | +| `pysam` | 0.15.3 | 0.19.0 | +| `bedtools` | 2.29.2 | 2.30.0 | +| `ucsc-bedgraphtobigwig` | 357 | 377 | +| `deeptools` | 3.4.3 | 3.5.1 | +| `pigz` | 2.3.4 | 2.6 | +| `preseq` | 2.0.3 | 3.1.2 | +| `multiqc` | 1.9 | 1.13a | +| `r-base` | 3.6.1 | 4.0.3 | +| `r-ggplot2` | 3.3.2 | 3.3.3 | +| `bioconductor-deseq2` | 1.26.0 | 1.28.0 | +| `trim-galore` | 0.6.5 | 0.6.7 | +| `r-optparse` | - | 1.7.1 | +| `chromap` | - | 0.2.1 | +| `bowtie2` | - | 2.4.4 | +| `star` | - | 2.6.1d | +| `r-tidyr` | - | - | +| `r-lattice` | - | - | +| `r-xfun` | - | - | +| `bioconductor-vsn` | - | - | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> **NB:** Dependency has been **added** if just the new version information is present. +> **NB:** Dependency has been **removed** if version information isn't present. + +## [[1.2.2](https://github.com/nf-core/chipseq/releases/tag/1.2.2)] - 2021-04-22 + +- [#206](https://github.com/nf-core/chipseq/issues/206) - Minor patch release to fix Conda environment ### `Dependencies` -* Update r-base `3.6.2` -> `3.6.3` -* Update r-xfun `0.15` -> `0.20` +- Update r-base `3.6.2` -> `3.6.3` +- Update r-xfun `0.15` -> `0.20` -## [1.2.1] - 2020-07-29 +## [[1.2.1](https://github.com/nf-core/chipseq/releases/tag/1.2.1)] - 2020-07-29 -* [#171](https://github.com/nf-core/chipseq/issues/171) - Minor patch release to update pipeline schema +- [#171](https://github.com/nf-core/chipseq/issues/171) - Minor patch release to update pipeline schema -## [1.2.0] - 2020-07-02 +## [[1.2.0](https://github.com/nf-core/chipseq/releases/tag/1.2.0)] - 2020-07-02 ### `Added` -* [#138](https://github.com/nf-core/chipseq/issues/138) - Add social preview image -* [#153](https://github.com/nf-core/chipseq/issues/153) - Add plotHeatmap -* [#159](https://github.com/nf-core/chipseq/issues/159) - expose bwa mem -T parameter -* [nf-core/atacseq#63](https://github.com/nf-core/atacseq/issues/63) - Added multicore support for Trim Galore! -* [nf-core/atacseq#75](https://github.com/nf-core/atacseq/issues/75) - Include gene annotation versions in multiqc report -* [nf-core/atacseq#76](https://github.com/nf-core/atacseq/issues/76) - featureCounts coupled to DESeq2 -* [nf-core/atacseq#79](https://github.com/nf-core/atacseq/issues/79) - Parallelize DESeq2 -* [nf-core/atacseq#97](https://github.com/nf-core/atacseq/issues/97) - PBC1, PBC2 from pipeline? -* [nf-core/atacseq#107](https://github.com/nf-core/atacseq/issues/107) - Add options to change MACS2 parameters -* Regenerated screenshots and added collapsible sections for output files in `docs/output.md` -* Update template to tools `1.9` -* Replace `set` with `tuple` and `file()` with `path()` in all processes -* Capitalise process names -* Parameters: - * `--bwa_min_score` to set minimum alignment score for BWA MEM - * `--macs_fdr` to provide FDR threshold for MACS2 peak calling - * `--macs_pvalue` to provide p-value threshold for MACS2 peak calling - * `--skip_peak_qc` to skip MACS2 peak QC plot generation - * `--skip_peak_annotation` to skip annotation of MACS2 and consensus peaks with HOMER - * `--skip_consensus_peaks` to skip consensus peak generation - * `--deseq2_vst` to use variance stabilizing transformation (VST) instead of regularized log transformation (rlog) with DESeq2 - * `--publish_dir_mode` to customise method of publishing results to output directory [nf-core/tools#585](https://github.com/nf-core/tools/issues/585) +- [#138](https://github.com/nf-core/chipseq/issues/138) - Add social preview image +- [#153](https://github.com/nf-core/chipseq/issues/153) - Add plotHeatmap +- [#159](https://github.com/nf-core/chipseq/issues/159) - expose bwa mem -T parameter +- [nf-core/atacseq#63](https://github.com/nf-core/atacseq/issues/63) - Added multicore support for Trim Galore! +- [nf-core/atacseq#75](https://github.com/nf-core/atacseq/issues/75) - Include gene annotation versions in multiqc report +- [nf-core/atacseq#76](https://github.com/nf-core/atacseq/issues/76) - featureCounts coupled to DESeq2 +- [nf-core/atacseq#79](https://github.com/nf-core/atacseq/issues/79) - Parallelize DESeq2 +- [nf-core/atacseq#97](https://github.com/nf-core/atacseq/issues/97) - PBC1, PBC2 from pipeline? +- [nf-core/atacseq#107](https://github.com/nf-core/atacseq/issues/107) - Add options to change MACS2 parameters +- Regenerated screenshots and added collapsible sections for output files in `docs/output.md` +- Update template to tools `1.9` +- Replace `set` with `tuple` and `file()` with `path()` in all processes +- Capitalise process names +- Parameters: + - `--bwa_min_score` to set minimum alignment score for BWA MEM + - `--macs_fdr` to provide FDR threshold for MACS2 peak calling + - `--macs_pvalue` to provide p-value threshold for MACS2 peak calling + - `--skip_peak_qc` to skip MACS2 peak QC plot generation + - `--skip_peak_annotation` to skip annotation of MACS2 and consensus peaks with HOMER + - `--skip_consensus_peaks` to skip consensus peak generation + - `--deseq2_vst` to use variance stabilizing transformation (VST) instead of regularized log transformation (rlog) with DESeq2 + - `--publish_dir_mode` to customise method of publishing results to output directory [nf-core/tools#585](https://github.com/nf-core/tools/issues/585) ### `Removed` -* `--tss_bed` parameter +- `--tss_bed` parameter ### `Fixed` -* [#118](https://github.com/nf-core/chipseq/issues/118) - Running on with SGE -* [#132](https://github.com/nf-core/chipseq/issues/132) - BigWig Error: sort: cannot create temporary file in '': Read-only file system -* [#154](https://github.com/nf-core/chipseq/issues/154) - computeMatrix.val.mat.gz files not zipped -* [nf-core/atacseq#71](https://github.com/nf-core/atacseq/issues/71) - consensus_peaks.mLb.clN.boolean.intersect.plot.pdf not generated -* [nf-core/atacseq#73](https://github.com/nf-core/atacseq/issues/73) - macs_annotatePeaks.mLb.clN.summary.txt file is not created -* [nf-core/atacseq#86](https://github.com/nf-core/atacseq/issues/86) - bug in the plot_homer_annotatepeaks.r script -* [nf-core/atacseq#102](https://github.com/nf-core/atacseq/issues/102) - Incorrect Group ID assigned by featurecounts_deseq2.r -* [nf-core/atacseq#109](https://github.com/nf-core/atacseq/issues/109) - Specify custom gtf but gene bed is not generated from that gtf? -* Make executables in `bin/` compatible with Python 3 +- [#118](https://github.com/nf-core/chipseq/issues/118) - Running on with SGE +- [#132](https://github.com/nf-core/chipseq/issues/132) - BigWig Error: sort: cannot create temporary file in '': Read-only file system +- [#154](https://github.com/nf-core/chipseq/issues/154) - computeMatrix.val.mat.gz files not zipped +- [nf-core/atacseq#71](https://github.com/nf-core/atacseq/issues/71) - consensus_peaks.mLb.clN.boolean.intersect.plot.pdf not generated +- [nf-core/atacseq#73](https://github.com/nf-core/atacseq/issues/73) - macs_annotatePeaks.mLb.clN.summary.txt file is not created +- [nf-core/atacseq#86](https://github.com/nf-core/atacseq/issues/86) - bug in the plot_homer_annotatepeaks.r script +- [nf-core/atacseq#102](https://github.com/nf-core/atacseq/issues/102) - Incorrect Group ID assigned by featurecounts_deseq2.r +- [nf-core/atacseq#109](https://github.com/nf-core/atacseq/issues/109) - Specify custom gtf but gene bed is not generated from that gtf? +- Make executables in `bin/` compatible with Python 3 ### `Dependencies` -* Add bioconductor-biocparallel `1.20.0` -* Add markdown `3.2.2` -* Add pigz `2.3.4` -* Add pygments `2.6.1` -* Add pymdown-extensions `7.1` -* Add python `3.7.6` -* Add r-reshape2 `1.4.4` -* Add r-tidyr `1.1.0` -* Update bedtools `2.27.1` -> `2.29.2` -* Update bioconductor-deseq2 `1.20.0` -> `1.26.0` -* Update bioconductor-vsn `3.46.0` -> `3.54.0` -* Update deeptools `3.2.1` -> `3.4.3` -* Update fastqc `0.11.8` -> `0.11.9` -* Update gawk `4.2.1` -> `5.1.0` -* Update homer `4.9.1` -> `4.11` -* Update macs2 `2.1.2` -> `2.2.7.1` -* Update multiqc `1.7` -> `1.8` -* Update phantompeakqualtools `1.2` -> `1.2.2` -* Update picard `2.19.0` -> `2.23.1` -* Update pysam `0.15.2` -> `0.15.3` -* Update r-base `3.4.1` -> `3.6.2` -* Update r-ggplot2 `3.1.0` -> `3.3.2` -* Update r-lattice `0.20_35` -> `0.20_41` -* Update r-optparse `1.6.0` -> `1.6.6` -* Update r-pheatmap `1.0.10` -> `1.0.12` -* Update r-scales `1.0.0` -> `1.1.1` -* Update r-upsetr `1.3.3` -> `1.4.0` -* Update r-xfun `0.3` -> `0.15` -* Update samtools `1.9` -> `1.10` -* Update subread `1.6.4` -> `2.0.1` -* Update trim-galore `0.5.0` -> `0.6.5` -* Update ucsc-bedgraphtobigwig `377` -> `357` - -## [1.1.0] - 2019-11-05 +- Add bioconductor-biocparallel `1.20.0` +- Add markdown `3.2.2` +- Add pigz `2.3.4` +- Add pygments `2.6.1` +- Add pymdown-extensions `7.1` +- Add python `3.7.6` +- Add r-reshape2 `1.4.4` +- Add r-tidyr `1.1.0` +- Update bedtools `2.27.1` -> `2.29.2` +- Update bioconductor-deseq2 `1.20.0` -> `1.26.0` +- Update bioconductor-vsn `3.46.0` -> `3.54.0` +- Update deeptools `3.2.1` -> `3.4.3` +- Update fastqc `0.11.8` -> `0.11.9` +- Update gawk `4.2.1` -> `5.1.0` +- Update homer `4.9.1` -> `4.11` +- Update macs2 `2.1.2` -> `2.2.7.1` +- Update multiqc `1.7` -> `1.8` +- Update phantompeakqualtools `1.2` -> `1.2.2` +- Update picard `2.19.0` -> `2.23.1` +- Update pysam `0.15.2` -> `0.15.3` +- Update r-base `3.4.1` -> `3.6.2` +- Update r-ggplot2 `3.1.0` -> `3.3.2` +- Update r-lattice `0.20_35` -> `0.20_41` +- Update r-optparse `1.6.0` -> `1.6.6` +- Update r-pheatmap `1.0.10` -> `1.0.12` +- Update r-scales `1.0.0` -> `1.1.1` +- Update r-upsetr `1.3.3` -> `1.4.0` +- Update r-xfun `0.3` -> `0.15` +- Update samtools `1.9` -> `1.10` +- Update subread `1.6.4` -> `2.0.1` +- Update trim-galore `0.5.0` -> `0.6.5` +- Update ucsc-bedgraphtobigwig `377` -> `357` + +## [[1.1.0](https://github.com/nf-core/chipseq/releases/tag/1.1.0)] - 2019-11-05 ### `Added` -* [nf-core/atacseq#46](https://github.com/nf-core/atacseq/issues/46) - Missing gene_bed path in igenomes config -* Update template to tools `1.7` -* Add `--trim_nextseq` parameter -* Add `CITATIONS.md` file -* Capitalised process names +- [nf-core/atacseq#46](https://github.com/nf-core/atacseq/issues/46) - Missing gene_bed path in igenomes config +- Update template to tools `1.7` +- Add `--trim_nextseq` parameter +- Add `CITATIONS.md` file +- Capitalised process names ### `Fixed` -* **Change all parameters from `camelCase` to `snake_case` (see [Deprecated](#Deprecated))** -* [nf-core/atacseq#44](https://github.com/nf-core/atacseq/issues/44) - Output directory missing: macs2/consensus/deseq2 -* [nf-core/atacseq#45](https://github.com/nf-core/atacseq/issues/45) - Wrong x-axis scale for the HOMER: Peak annotation Counts tab plot? -* [nf-core/atacseq#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly -* [nf-core/atacseq#50](https://github.com/nf-core/atacseq/issues/50) - HOMER number of peaks does not correspond to found MACS2 peaks -* Fixed bug in UpSetR peak intersection plot -* Increase default resource requirements in `base.config` -* Increase process-specific requirements based on user-reported failures +- Change all parameters from `camelCase` to `snake_case` (see [Deprecated](#Deprecated)) +- [nf-core/atacseq#44](https://github.com/nf-core/atacseq/issues/44) - Output directory missing: macs2/consensus/deseq2 +- [nf-core/atacseq#45](https://github.com/nf-core/atacseq/issues/45) - Wrong x-axis scale for the HOMER: Peak annotation Counts tab plot? +- [nf-core/atacseq#46](https://github.com/nf-core/atacseq/issues/46) - Stage blacklist file in channel properly +- [nf-core/atacseq#50](https://github.com/nf-core/atacseq/issues/50) - HOMER number of peaks does not correspond to found MACS2 peaks +- Fixed bug in UpSetR peak intersection plot +- Increase default resource requirements in `base.config` +- Increase process-specific requirements based on user-reported failures ### `Dependencies` -* Update Nextflow `0.32.0` -> `19.10.0` +- Update Nextflow `0.32.0` -> `19.10.0` ### `Deprecated` | Deprecated | Replacement | -|------------------------------|---------------------------| +| ---------------------------- | ------------------------- | | `--design` | `--input` | | `--singleEnd` | `--single_end` | | `--saveGenomeIndex` | `--save_reference` | @@ -143,20 +227,20 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. | `--skipIGV` | `--skip_igv` | | `--skipMultiQC` | `--skip_multiqc` | -## [1.0.0] - 2019-06-06 +## [[1.0.0](https://github.com/nf-core/chipseq/releases/tag/1.0.0)] - 2019-06-06 Initial release of nf-core/chipseq pipeline. ### `Added` -* Raw read QC (FastQC) -* Adapter trimming (Trim Galore!) -* Map and filter reads (BWA, picard, SAMtools, BEDTools, BAMTools, Pysam) -* Create library-size normalised bigWig tracks (BEDTools, bedGraphToBigWig) -* Alignment QC metrics (Preseq, picard) -* ChIP-seq QC metrics (deepTools, phantompeakqualtools) -* Call and annotate broad/narrow peaks (MACS2, HOMER) -* Create consensus set of peaks per antibody (BEDTools) -* Quantification and differential binding analysis (featureCounts, DESeq2) -* Collate appropriate files for genome browser visualisation (IGV) -* Collate and present various QC metrics (MultiQC, R) +- Raw read QC (FastQC) +- Adapter trimming (Trim Galore!) +- Map and filter reads (BWA, picard, SAMtools, BEDTools, BAMTools, Pysam) +- Create library-size normalised bigWig tracks (BEDTools, bedGraphToBigWig) +- Alignment QC metrics (Preseq, picard) +- ChIP-seq QC metrics (deepTools, phantompeakqualtools) +- Call and annotate broad/narrow peaks (MACS2, HOMER) +- Create consensus set of peaks per antibody (BEDTools) +- Quantification and differential binding analysis (featureCounts, DESeq2) +- Collate appropriate files for genome browser visualisation (IGV) +- Collate and present various QC metrics (MultiQC, R) diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 000000000..4533e2f28 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,56 @@ +cff-version: 1.2.0 +message: "If you use `nf-core tools` in your work, please cite the `nf-core` publication" +authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven +title: "The nf-core framework for community-curated bioinformatics pipelines." +version: 2.4.1 +doi: 10.1038/s41587-020-0439-x +date-released: 2022-05-16 +url: https://github.com/nf-core/tools +prefered-citation: + type: article + authors: + - family-names: Ewels + given-names: Philip + - family-names: Peltzer + given-names: Alexander + - family-names: Fillinger + given-names: Sven + - family-names: Patel + given-names: Harshil + - family-names: Alneberg + given-names: Johannes + - family-names: Wilm + given-names: Andreas + - family-names: Ulysse Garcia + given-names: Maxime + - family-names: Di Tommaso + given-names: Paolo + - family-names: Nahnsen + given-names: Sven + doi: 10.1038/s41587-020-0439-x + journal: nature biotechnology + start: 276 + end: 278 + title: "The nf-core framework for community-curated bioinformatics pipelines." + issue: 3 + volume: 38 + year: 2020 + url: https://dx.doi.org/10.1038/s41587-020-0439-x diff --git a/CITATIONS.md b/CITATIONS.md index 987f08bbc..63188ca80 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -1,106 +1,135 @@ -# nf-core/chipseq: Citations - -## [nf-core](https://www.ncbi.nlm.nih.gov/pubmed/32055031/) - -> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. ReadCube: [Full Access Link](https://rdcu.be/b1GjZ). - -## [Nextflow](https://www.ncbi.nlm.nih.gov/pubmed/28398311/) - -> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. - -## Pipeline tools - -* [BWA](https://www.ncbi.nlm.nih.gov/pubmed/19451168/) - > Li H, Durbin R. Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics. 2009 Jul 15;25(14):1754-60. doi: 10.1093/bioinformatics/btp324. Epub 2009 May 18. PubMed PMID: 19451168; PubMed Central PMCID: PMC2705234. - -* [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) - > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. - -* [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) - > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. - -* [BamTools](https://www.ncbi.nlm.nih.gov/pubmed/21493652/) - > Barnett DW, Garrison EK, Quinlan AR, Strömberg MP, Marth GT. BamTools: a C++ API and toolkit for analyzing and managing BAM files. Bioinformatics. 2011 Jun 15;27(12):1691-2. doi: 10.1093/bioinformatics/btr174. Epub 2011 Apr 14. PubMed PMID: 21493652; PubMed Central PMCID: PMC3106182. - -* [UCSC tools](https://www.ncbi.nlm.nih.gov/pubmed/20639541/) - > Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891. - -* [preseq](https://www.ncbi.nlm.nih.gov/pubmed/23435259/) - > Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. Nat Methods. 2013 Apr;10(4):325-7. doi: 10.1038/nmeth.2375. Epub 2013 Feb 24. PubMed PMID: 23435259; PubMed Central PMCID: PMC3612374. - -* [deepTools](https://www.ncbi.nlm.nih.gov/pubmed/27079975/) - > Ramírez F, Ryan DP, Grüning B, Bhardwaj V, Kilpert F, Richter AS, Heyne S, Dündar F, Manke T. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Res. 2016 Jul 8;44(W1):W160-5. doi: 10.1093/nar/gkw257. Epub 2016 Apr 13. PubMed PMID: 27079975; PubMed Central PMCID: PMC4987876. - -* [MACS2](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) - > Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17. PubMed PMID: 18798982; PubMed Central PMCID: PMC2592715. - -* [HOMER](https://www.ncbi.nlm.nih.gov/pubmed/20513432/) - > Heinz S, Benner C, Spann N, Bertolino E, Lin YC, Laslo P, Cheng JX, Murre C, Singh H, Glass CK. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Mol Cell. 2010 May 28;38(4):576-89. doi: 10.1016/j.molcel.2010.05.004. PubMed PMID: 20513432; PubMed Central PMCID: PMC2898526. - -* [phantompeakqualtools](https://www.ncbi.nlm.nih.gov/pubmed/22955991/) - > Landt SG, Marinov GK, Kundaje A, Kheradpour P, Pauli F, Batzoglou S, Bernstein BE, Bickel P, Brown JB, Cayting P, Chen Y, DeSalvo G, Epstein C, Fisher-Aylor KI, Euskirchen G, Gerstein M, Gertz J, Hartemink AJ, Hoffman MM, Iyer VR, Jung YL, Karmakar S, Kellis M, Kharchenko PV, Li Q, Liu T, Liu XS, Ma L, Milosavljevic A, Myers RM, Park PJ, Pazin MJ, Perry MD, Raha D, Reddy TE, Rozowsky J, Shoresh N, Sidow A, Slattery M, Stamatoyannopoulos JA, Tolstorukov MY, White KP, Xi S, Farnham PJ, Lieb JD, Wold BJ, Snyder M. ChIP-seq guidelines and practices of the ENCODE and modENCODE consortia. Genome Res. 2012 Sep;22(9):1813-31. doi: 10.1101/gr.136184.111. PubMed PMID: 22955991; PubMed Central PMCID: PMC3431496. - -* [featureCounts](https://www.ncbi.nlm.nih.gov/pubmed/24227677/) - > Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677. - -* [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) - > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. - -* [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) - -* [Trim Galore!](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) - -* [picard-tools](http://broadinstitute.github.io/picard) - -* [pysam](https://github.com/pysam-developers/pysam) - -## R packages - -* [R](https://www.R-project.org/) - > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. - -* [DESeq2](https://www.ncbi.nlm.nih.gov/pubmed/25516281/) - > Love MI, Huber W, Anders S. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol. 2014;15(12):550. PubMed PMID: 25516281; PubMed Central PMCID: PMC4302049. - -* [vsn](https://bioconductor.org/packages/release/bioc/html/vsn.html) - > Wolfgang Huber, Anja von Heydebreck, Holger Sueltmann, Annemarie Poustka and Martin Vingron. Variance Stabilization Applied to Microarray Data Calibration and to the Quantification of Differential Expression. Bioinformatics 18, S96-S104 (2002). - -* [UpSetR](https://CRAN.R-project.org/package=UpSetR) - > Nils Gehlenborg (2017). UpSetR: A More Scalable Alternative to Venn and Euler Diagrams for Visualizing Intersecting Sets. - -* [ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html) - > H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016. - -* [reshape2](http://www.jstatsoft.org/v21/i12/) - > Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-20. - -* [scales](https://CRAN.R-project.org/package=scales) - > Hadley Wickham (2018). scales: Scale Functions for Visualization. - -* [pheatmap](https://CRAN.R-project.org/package=pheatmap) - > Raivo Kolde (2018). pheatmap: Pretty Heatmaps. - -* [lattice](https://cran.r-project.org/web/packages/lattice/index.html) - > Sarkar, Deepayan (2008) Lattice: Multivariate Data Visualization with R. Springer, New York. ISBN 978-0-387-75968-5. - -* [RColorBrewer](https://CRAN.R-project.org/package=RColorBrewer) - > Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. - -* [optparse](https://CRAN.R-project.org/package=optparse) - > Trevor L Davis (2018). optparse: Command Line Option Parser. - -* [xfun](https://CRAN.R-project.org/package=xfun) - > Yihui Xie (2018). xfun: Miscellaneous Functions by 'Yihui Xie'. - -## Software packaging/containerisation tools - -* [Bioconda](https://www.ncbi.nlm.nih.gov/pubmed/29967506/) - > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. - -* [Anaconda](https://anaconda.com) - > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. - -* [Singularity](https://www.ncbi.nlm.nih.gov/pubmed/28494014/) - > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. - -* [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) +# nf-core/chipseq: Citations + +## [nf-core](https://pubmed.ncbi.nlm.nih.gov/32055031/) + +> Ewels PA, Peltzer A, Fillinger S, Patel H, Alneberg J, Wilm A, Garcia MU, Di Tommaso P, Nahnsen S. The nf-core framework for community-curated bioinformatics pipelines. Nat Biotechnol. 2020 Mar;38(3):276-278. doi: 10.1038/s41587-020-0439-x. PubMed PMID: 32055031. + +## [Nextflow](https://pubmed.ncbi.nlm.nih.gov/28398311/) + +> Di Tommaso P, Chatzou M, Floden EW, Barja PP, Palumbo E, Notredame C. Nextflow enables reproducible computational workflows. Nat Biotechnol. 2017 Apr 11;35(4):316-319. doi: 10.1038/nbt.3820. PubMed PMID: 28398311. + +## Pipeline tools + +- [BWA](https://www.ncbi.nlm.nih.gov/pubmed/19451168/) + + > Li H, Durbin R. Fast and accurate short read alignment with Burrows-Wheeler transform. Bioinformatics. 2009 Jul 15;25(14):1754-60. doi: 10.1093/bioinformatics/btp324. Epub 2009 May 18. PubMed PMID: 19451168; PubMed Central PMCID: PMC2705234. + +- [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) + + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + +- [BamTools](https://www.ncbi.nlm.nih.gov/pubmed/21493652/) + + > Barnett DW, Garrison EK, Quinlan AR, Strömberg MP, Marth GT. BamTools: a C++ API and toolkit for analyzing and managing BAM files. Bioinformatics. 2011 Jun 15;27(12):1691-2. doi: 10.1093/bioinformatics/btr174. Epub 2011 Apr 14. PubMed PMID: 21493652; PubMed Central PMCID: PMC3106182. + +- [Bowtie2](https:/dx.doi.org/10.1038/nmeth.1923) + + > Langmead, B. and Salzberg, S. L. 2012 Fast gapped-read alignment with Bowtie 2. Nature methods, 9(4), p. 357–359. doi: 10.1038/nmeth.1923. + +- [Chromap](https://doi.org/10.1038/s41467-021-26865-w) + + > Zhang H, Song L, Wang X, Cheng H, Wang C, Meyer CA, Liu T, Tang M, Aluru S, Yue F, Liu XS and Li H. Fast alignment and preprocessing of chromatin profiles with Chromap. Nature communications. 2021, 12(1), 1-6. doi: 10.1038/s41467-021-26865-w + +- [deepTools](https://www.ncbi.nlm.nih.gov/pubmed/27079975/) + + > Ramírez F, Ryan DP, Grüning B, Bhardwaj V, Kilpert F, Richter AS, Heyne S, Dündar F, Manke T. deepTools2: a next generation web server for deep-sequencing data analysis. Nucleic Acids Res. 2016 Jul 8;44(W1):W160-5. doi: 10.1093/nar/gkw257. Epub 2016 Apr 13. PubMed PMID: 27079975; PubMed Central PMCID: PMC4987876. + +- [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + +- [featureCounts](https://www.ncbi.nlm.nih.gov/pubmed/24227677/) + + > Liao Y, Smyth GK, Shi W. featureCounts: an efficient general purpose program for assigning sequence reads to genomic features. Bioinformatics. 2014 Apr 1;30(7):923-30. doi: 10.1093/bioinformatics/btt656. Epub 2013 Nov 13. PubMed PMID: 24227677. + +- [HOMER](https://www.ncbi.nlm.nih.gov/pubmed/20513432/) + + > Heinz S, Benner C, Spann N, Bertolino E, Lin YC, Laslo P, Cheng JX, Murre C, Singh H, Glass CK. Simple combinations of lineage-determining transcription factors prime cis-regulatory elements required for macrophage and B cell identities. Mol Cell. 2010 May 28;38(4):576-89. doi: 10.1016/j.molcel.2010.05.004. PubMed PMID: 20513432; PubMed Central PMCID: PMC2898526. + +- [MACS2](https://www.ncbi.nlm.nih.gov/pubmed/18798982/) + + > Zhang Y, Liu T, Meyer CA, Eeckhoute J, Johnson DS, Bernstein BE, Nusbaum C, Myers RM, Brown M, Li W, Liu XS. Model-based analysis of ChIP-Seq (MACS). Genome Biol. 2008;9(9):R137. doi: 10.1186/gb-2008-9-9-r137. Epub 2008 Sep 17. PubMed PMID: 18798982; PubMed Central PMCID: PMC2592715. + +- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +- [phantompeakqualtools](https://www.ncbi.nlm.nih.gov/pubmed/22955991/) + + > Landt SG, Marinov GK, Kundaje A, Kheradpour P, Pauli F, Batzoglou S, Bernstein BE, Bickel P, Brown JB, Cayting P, Chen Y, DeSalvo G, Epstein C, Fisher-Aylor KI, Euskirchen G, Gerstein M, Gertz J, Hartemink AJ, Hoffman MM, Iyer VR, Jung YL, Karmakar S, Kellis M, Kharchenko PV, Li Q, Liu T, Liu XS, Ma L, Milosavljevic A, Myers RM, Park PJ, Pazin MJ, Perry MD, Raha D, Reddy TE, Rozowsky J, Shoresh N, Sidow A, Slattery M, Stamatoyannopoulos JA, Tolstorukov MY, White KP, Xi S, Farnham PJ, Lieb JD, Wold BJ, Snyder M. ChIP-seq guidelines and practices of the ENCODE and modENCODE consortia. Genome Res. 2012 Sep;22(9):1813-31. doi: 10.1101/gr.136184.111. PubMed PMID: 22955991; PubMed Central PMCID: PMC3431496. + +- [picard-tools](http://broadinstitute.github.io/picard) + +- [preseq](https://www.ncbi.nlm.nih.gov/pubmed/23435259/) + + > Daley T, Smith AD. Predicting the molecular complexity of sequencing libraries. Nat Methods. 2013 Apr;10(4):325-7. doi: 10.1038/nmeth.2375. Epub 2013 Feb 24. PubMed PMID: 23435259; PubMed Central PMCID: PMC3612374. + +- [pysam](https://github.com/pysam-developers/pysam) + +- [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +- [STAR](https://pubmed.ncbi.nlm.nih.gov/23104886/) + + > Dobin A, Davis CA, Schlesinger F, Drenkow J, Zaleski C, Jha S, Batut P, Chaisson M, Gingeras TR. STAR: ultrafast universal RNA-seq aligner Bioinformatics. 2013 Jan 1;29(1):15-21. doi: 10.1093/bioinformatics/bts635. Epub 2012 Oct 25. PubMed PMID: 23104886; PubMed Central PMCID: PMC3530905. + +- [Trim Galore!](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/) + +- [UCSC tools](https://www.ncbi.nlm.nih.gov/pubmed/20639541/) + > Kent WJ, Zweig AS, Barber G, Hinrichs AS, Karolchik D. BigWig and BigBed: enabling browsing of large distributed datasets. Bioinformatics. 2010 Sep 1;26(17):2204-7. doi: 10.1093/bioinformatics/btq351. Epub 2010 Jul 17. PubMed PMID: 20639541; PubMed Central PMCID: PMC2922891. + +## R packages + +- [R](https://www.R-project.org/) + + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + +- [DESeq2](https://www.ncbi.nlm.nih.gov/pubmed/25516281/) + + > Love MI, Huber W, Anders S. Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2. Genome Biol. 2014;15(12):550. PubMed PMID: 25516281; PubMed Central PMCID: PMC4302049. + +- [UpSetR](https://CRAN.R-project.org/package=UpSetR) + + > Nils Gehlenborg (2017). UpSetR: A More Scalable Alternative to Venn and Euler Diagrams for Visualizing Intersecting Sets. + +- [ggplot2](https://cran.r-project.org/web/packages/ggplot2/index.html) + + > H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, 2016. + +- [reshape2](http://www.jstatsoft.org/v21/i12/) + + > Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-20. + +- [scales](https://CRAN.R-project.org/package=scales) + + > Hadley Wickham (2018). scales: Scale Functions for Visualization. + +- [pheatmap](https://CRAN.R-project.org/package=pheatmap) + + > Raivo Kolde (2018). pheatmap: Pretty Heatmaps. + +- [RColorBrewer](https://CRAN.R-project.org/package=RColorBrewer) + + > Erich Neuwirth (2014). RColorBrewer: ColorBrewer Palettes. + +- [optparse](https://CRAN.R-project.org/package=optparse) + + > Trevor L Davis (2018). optparse: Command Line Option Parser. + +## Software packaging/containerisation tools + +- [Anaconda](https://anaconda.com) + + > Anaconda Software Distribution. Computer software. Vers. 2-2.4.0. Anaconda, Nov. 2016. Web. + +- [Bioconda](https://pubmed.ncbi.nlm.nih.gov/29967506/) + + > Grüning B, Dale R, Sjödin A, Chapman BA, Rowe J, Tomkins-Tinch CH, Valieris R, Köster J; Bioconda Team. Bioconda: sustainable and comprehensive software distribution for the life sciences. Nat Methods. 2018 Jul;15(7):475-476. doi: 10.1038/s41592-018-0046-7. PubMed PMID: 29967506. + +- [BioContainers](https://pubmed.ncbi.nlm.nih.gov/28379341/) + + > da Veiga Leprevost F, Grüning B, Aflitos SA, Röst HL, Uszkoreit J, Barsnes H, Vaudel M, Moreno P, Gatto L, Weber J, Bai M, Jimenez RC, Sachsenberg T, Pfeuffer J, Alvarez RV, Griss J, Nesvizhskii AI, Perez-Riverol Y. BioContainers: an open-source and community-driven framework for software standardization. Bioinformatics. 2017 Aug 15;33(16):2580-2582. doi: 10.1093/bioinformatics/btx192. PubMed PMID: 28379341; PubMed Central PMCID: PMC5870671. + +- [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + +- [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index cf930c8ac..f4fd052f1 100755 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -1,46 +1,111 @@ -# Contributor Covenant Code of Conduct +# Code of Conduct at nf-core (v1.0) ## Our Pledge -In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. +In the interest of fostering an open, collaborative, and welcoming environment, we as contributors and maintainers of nf-core, pledge to making participation in our projects and community a harassment-free experience for everyone, regardless of: -## Our Standards +- Age +- Body size +- Familial status +- Gender identity and expression +- Geographical location +- Level of experience +- Nationality and national origins +- Native language +- Physical and neurological ability +- Race or ethnicity +- Religion +- Sexual identity and orientation +- Socioeconomic status -Examples of behavior that contributes to creating a positive environment include: +Please note that the list above is alphabetised and is therefore not ranked in any order of preference or importance. -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members +## Preamble -Examples of unacceptable behavior by participants include: +> Note: This Code of Conduct (CoC) has been drafted by the nf-core Safety Officer and been edited after input from members of the nf-core team and others. "We", in this document, refers to the Safety Officer and members of the nf-core core team, both of whom are deemed to be members of the nf-core community and are therefore required to abide by this Code of Conduct. This document will amended periodically to keep it up-to-date, and in case of any dispute, the most current version will apply. -* The use of sexualized language or imagery and unwelcome sexual attention or advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a professional setting +An up-to-date list of members of the nf-core core team can be found [here](https://nf-co.re/about). Our current safety officer is Renuka Kudva. + +nf-core is a young and growing community that welcomes contributions from anyone with a shared vision for [Open Science Policies](https://www.fosteropenscience.eu/taxonomy/term/8). Open science policies encompass inclusive behaviours and we strive to build and maintain a safe and inclusive environment for all individuals. + +We have therefore adopted this code of conduct (CoC), which we require all members of our community and attendees in nf-core events to adhere to in all our workspaces at all times. Workspaces include but are not limited to Slack, meetings on Zoom, Jitsi, YouTube live etc. + +Our CoC will be strictly enforced and the nf-core team reserve the right to exclude participants who do not comply with our guidelines from our workspaces and future nf-core activities. + +We ask all members of our community to help maintain a supportive and productive workspace and to avoid behaviours that can make individuals feel unsafe or unwelcome. Please help us maintain and uphold this CoC. + +Questions, concerns or ideas on what we can include? Contact safety [at] nf-co [dot] re ## Our Responsibilities -Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. +The safety officer is responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behaviour. + +The safety officer in consultation with the nf-core core team have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +Members of the core team or the safety officer who violate the CoC will be required to recuse themselves pending investigation. They will not have access to any reports of the violations and be subject to the same actions as others in violation of the CoC. + +## When are where does this Code of Conduct apply? + +Participation in the nf-core community is contingent on following these guidelines in all our workspaces and events. This includes but is not limited to the following listed alphabetically and therefore in no order of preference: + +- Communicating with an official project email address. +- Communicating with community members within the nf-core Slack channel. +- Participating in hackathons organised by nf-core (both online and in-person events). +- Participating in collaborative work on GitHub, Google Suite, community calls, mentorship meetings, email correspondence. +- Participating in workshops, training, and seminar series organised by nf-core (both online and in-person events). This applies to events hosted on web-based platforms such as Zoom, Jitsi, YouTube live etc. +- Representing nf-core on social media. This includes both official and personal accounts. + +## nf-core cares 😊 + +nf-core's CoC and expectations of respectful behaviours for all participants (including organisers and the nf-core team) include but are not limited to the following (listed in alphabetical order): + +- Ask for consent before sharing another community member’s personal information (including photographs) on social media. +- Be respectful of differing viewpoints and experiences. We are all here to learn from one another and a difference in opinion can present a good learning opportunity. +- Celebrate your accomplishments at events! (Get creative with your use of emojis 🎉 🥳 💯 🙌 !) +- Demonstrate empathy towards other community members. (We don’t all have the same amount of time to dedicate to nf-core. If tasks are pending, don’t hesitate to gently remind members of your team. If you are leading a task, ask for help if you feel overwhelmed.) +- Engage with and enquire after others. (This is especially important given the geographically remote nature of the nf-core community, so let’s do this the best we can) +- Focus on what is best for the team and the community. (When in doubt, ask) +- Graciously accept constructive criticism, yet be unafraid to question, deliberate, and learn. +- Introduce yourself to members of the community. (We’ve all been outsiders and we know that talking to strangers can be hard for some, but remember we’re interested in getting to know you and your visions for open science!) +- Show appreciation and **provide clear feedback**. (This is especially important because we don’t see each other in person and it can be harder to interpret subtleties. Also remember that not everyone understands a certain language to the same extent as you do, so **be clear in your communications to be kind.**) +- Take breaks when you feel like you need them. +- Using welcoming and inclusive language. (Participants are encouraged to display their chosen pronouns on Zoom or in communication on Slack.) + +## nf-core frowns on 😕 + +The following behaviours from any participants within the nf-core community (including the organisers) will be considered unacceptable under this code of conduct. Engaging or advocating for any of the following could result in expulsion from nf-core workspaces. + +- Deliberate intimidation, stalking or following and sustained disruption of communication among participants of the community. This includes hijacking shared screens through actions such as using the annotate tool in conferencing software such as Zoom. +- “Doxing” i.e. posting (or threatening to post) another person’s personal identifying information online. +- Spamming or trolling of individuals on social media. +- Use of sexual or discriminatory imagery, comments, or jokes and unwelcome sexual attention. +- Verbal and text comments that reinforce social structures of domination related to gender, gender identity and expression, sexual orientation, ability, physical appearance, body size, race, age, religion or work experience. + +### Online Trolling + +The majority of nf-core interactions and events are held online. Unfortunately, holding events online comes with the added issue of online trolling. This is unacceptable, reports of such behaviour will be taken very seriously, and perpetrators will be excluded from activities immediately. + +All community members are required to ask members of the group they are working within for explicit consent prior to taking screenshots of individuals during video calls. + +## Procedures for Reporting CoC violations -Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. +If someone makes you feel uncomfortable through their behaviours or actions, report it as soon as possible. -## Scope +You can reach out to members of the [nf-core core team](https://nf-co.re/about) and they will forward your concerns to the safety officer(s). -This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. +Issues directly concerning members of the core team will be dealt with by other members of the core team and the safety manager, and possible conflicts of interest will be taken into account. nf-core is also in discussions about having an ombudsperson, and details will be shared in due course. -## Enforcement +All reports will be handled with utmost discretion and confidentially. -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team on [Slack](https://nf-co.re/join/slack). The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. +## Attribution and Acknowledgements -Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. +- The [Contributor Covenant, version 1.4](http://contributor-covenant.org/version/1/4) +- The [OpenCon 2017 Code of Conduct](http://www.opencon2017.org/code_of_conduct) (CC BY 4.0 OpenCon organisers, SPARC and Right to Research Coalition) +- The [eLife innovation sprint 2020 Code of Conduct](https://sprint.elifesciences.org/code-of-conduct/) +- The [Mozilla Community Participation Guidelines v3.1](https://www.mozilla.org/en-US/about/governance/policies/participation/) (version 3.1, CC BY-SA 3.0 Mozilla) -## Attribution +## Changelog -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] +### v1.0 - March 12th, 2021 -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ +- Complete rewrite from original [Contributor Covenant](http://contributor-covenant.org/) CoC. diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 13ec872b4..000000000 --- a/Dockerfile +++ /dev/null @@ -1,17 +0,0 @@ -FROM nfcore/base:1.9 -LABEL authors="Philip Ewels" \ - description="Docker image containing all software requirements for the nf-core/chipseq pipeline" - -# Install the conda environment -COPY environment.yml / -RUN conda env create -f /environment.yml && conda clean -a - -# Add conda installation dir to PATH (instead of doing 'conda activate') -ENV PATH /opt/conda/envs/nf-core-chipseq-1.2.2/bin:$PATH - -# Dump the details of the installed packages to a file for posterity -RUN conda env export --name nf-core-chipseq-1.2.2 > nf-core-chipseq-1.2.2.yml - -# Instruct R processes to use these empty files instead of clashing with a local version -RUN touch .Rprofile -RUN touch .Renviron diff --git a/LICENSE b/LICENSE index 2a0f576bc..9ddb87a26 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) Philip Ewels +Copyright (c) Espinosa-Carrasco J, Patel H, Wang C, Ewels P Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 09d9d5b47..519a93ebd 100644 --- a/README.md +++ b/README.md @@ -1,90 +1,99 @@ -# ![nf-core/chipseq](docs/images/nf-core-chipseq_logo.png) +# ![nf-core/chipseq](docs/images/nf-core-chipseq_logo_light.png#gh-light-mode-only) ![nf-core/chipseq](docs/images/nf-core-chipseq_logo_dark.png#gh-dark-mode-only) -[![GitHub Actions CI Status](https://github.com/nf-core/chipseq/workflows/nf-core%20CI/badge.svg)](https://github.com/nf-core/chipseq/actions) -[![GitHub Actions Linting Status](https://github.com/nf-core/chipseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/chipseq/actions) -[![Nextflow](https://img.shields.io/badge/nextflow-%E2%89%A519.10.0-brightgreen.svg)](https://www.nextflow.io/) -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3240506.svg)](https://doi.org/10.5281/zenodo.3240506) +[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/chipseq/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3240506-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.3240506) -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg)](http://bioconda.github.io/) -[![Docker](https://img.shields.io/docker/automated/nfcore/chipseq.svg)](https://hub.docker.com/r/nfcore/chipseq/) -[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23chipseq-4A154B?logo=slack)](https://nfcore.slack.com/channels/chipseq) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A521.10.3-23aa62.svg)](https://www.nextflow.io/) +[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) +[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) +[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) +[![Launch on Nextflow Tower](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Nextflow%20Tower-%234256e7)](https://tower.nf/launch?pipeline=https://github.com/nf-core/chipseq) + +[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23chipseq-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/chipseq)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core) ## Introduction **nfcore/chipseq** is a bioinformatics analysis pipeline used for Chromatin ImmunopreciPitation sequencing (ChIP-seq) data. -The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It comes with docker containers making installation trivial and results highly reproducible. +On release, automated continuous integration tests run the pipeline on a [full-sized dataset](https://github.com/nf-core/test-datasets/tree/chipseq#full-test-dataset-origin) on the AWS cloud infrastructure. The dataset consists of FoxA1 (transcription factor) and EZH2 (histone,mark) IP experiments from _Franco et al. 2015_ ([GEO: GSE59530](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE59530), [PMID: 25752574](https://pubmed.ncbi.nlm.nih.gov/25752574/)) and _Popovic et al. 2014_ ([GEO: GSE57632](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE57632), [PMID: 25188243](https://pubmed.ncbi.nlm.nih.gov/25188243/)), respectively. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from running the full-sized tests can be viewed on the [nf-core website](https://nf-co.re/chipseq/results). + +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! + +## Online videos + +A short talk about the history, current status and functionality on offer in this pipeline was given by Jose Espinosa-Carrasco ([@joseespinosa](https://github.com/joseespinosa)) on [26th July 2022](https://nf-co.re/events/2022/bytesize-chipseq) as part of the nf-core/bytesize series. + +You can find numerous talks on the [nf-core events page](https://nf-co.re/events) from various topics including writing pipelines/modules in Nextflow DSL2, using nf-core tooling, running nf-core pipelines as well as more generic content like contributing to Github. Please check them out! ## Pipeline summary 1. Raw read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) 2. Adapter trimming ([`Trim Galore!`](https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/)) -3. Alignment ([`BWA`](https://sourceforge.net/projects/bio-bwa/files/)) +3. Choice of multiple aligners + 1.([`BWA`](https://sourceforge.net/projects/bio-bwa/files/)) + 2.([`Chromap`](https://github.com/haowenz/chromap)). **For paired-end reads only working until mapping steps, see [here](https://github.com/nf-core/chipseq/issues/291)** + 3.([`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) + 4.([`STAR`](https://github.com/alexdobin/STAR)) 4. Mark duplicates ([`picard`](https://broadinstitute.github.io/picard/)) 5. Merge alignments from multiple libraries of the same sample ([`picard`](https://broadinstitute.github.io/picard/)) - 1. Re-mark duplicates ([`picard`](https://broadinstitute.github.io/picard/)) - 2. Filtering to remove: - * reads mapping to blacklisted regions ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/), [`BEDTools`](https://github.com/arq5x/bedtools2/)) - * reads that are marked as duplicates ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - * reads that arent marked as primary alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - * reads that are unmapped ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - * reads that map to multiple locations ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) - * reads containing > 4 mismatches ([`BAMTools`](https://github.com/pezmaster31/bamtools)) - * reads that have an insert size > 2kb ([`BAMTools`](https://github.com/pezmaster31/bamtools); *paired-end only*) - * reads that map to different chromosomes ([`Pysam`](http://pysam.readthedocs.io/en/latest/installation.html); *paired-end only*) - * reads that arent in FR orientation ([`Pysam`](http://pysam.readthedocs.io/en/latest/installation.html); *paired-end only*) - * reads where only one read of the pair fails the above criteria ([`Pysam`](http://pysam.readthedocs.io/en/latest/installation.html); *paired-end only*) - 3. Alignment-level QC and estimation of library complexity ([`picard`](https://broadinstitute.github.io/picard/), [`Preseq`](http://smithlabresearch.org/software/preseq/)) - 4. Create normalised bigWig files scaled to 1 million mapped reads ([`BEDTools`](https://github.com/arq5x/bedtools2/), [`bedGraphToBigWig`](http://hgdownload.soe.ucsc.edu/admin/exe/)) - 5. Generate gene-body meta-profile from bigWig files ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotProfile.html)) - 6. Calculate genome-wide IP enrichment relative to control ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html)) - 7. Calculate strand cross-correlation peak and ChIP-seq quality measures including NSC and RSC ([`phantompeakqualtools`](https://github.com/kundajelab/phantompeakqualtools)) - 8. Call broad/narrow peaks ([`MACS2`](https://github.com/taoliu/MACS)) - 9. Annotate peaks relative to gene features ([`HOMER`](http://homer.ucsd.edu/homer/download.html)) - 10. Create consensus peakset across all samples and create tabular file to aid in the filtering of the data ([`BEDTools`](https://github.com/arq5x/bedtools2/)) - 11. Count reads in consensus peaks ([`featureCounts`](http://bioinf.wehi.edu.au/featureCounts/)) - 12. Differential binding analysis, PCA and clustering ([`R`](https://www.r-project.org/), [`DESeq2`](https://bioconductor.org/packages/release/bioc/html/DESeq2.html)) + 1. Re-mark duplicates ([`picard`](https://broadinstitute.github.io/picard/)) + 2. Filtering to remove: + - reads mapping to blacklisted regions ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/), [`BEDTools`](https://github.com/arq5x/bedtools2/)) + - reads that are marked as duplicates ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + - reads that are not marked as primary alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + - reads that are unmapped ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + - reads that map to multiple locations ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + - reads containing > 4 mismatches ([`BAMTools`](https://github.com/pezmaster31/bamtools)) + - reads that have an insert size > 2kb ([`BAMTools`](https://github.com/pezmaster31/bamtools); _paired-end only_) + - reads that map to different chromosomes ([`Pysam`](http://pysam.readthedocs.io/en/latest/installation.html); _paired-end only_) + - reads that arent in FR orientation ([`Pysam`](http://pysam.readthedocs.io/en/latest/installation.html); _paired-end only_) + - reads where only one read of the pair fails the above criteria ([`Pysam`](http://pysam.readthedocs.io/en/latest/installation.html); _paired-end only_) + 3. Alignment-level QC and estimation of library complexity ([`picard`](https://broadinstitute.github.io/picard/), [`Preseq`](http://smithlabresearch.org/software/preseq/)) + 4. Create normalised bigWig files scaled to 1 million mapped reads ([`BEDTools`](https://github.com/arq5x/bedtools2/), [`bedGraphToBigWig`](http://hgdownload.soe.ucsc.edu/admin/exe/)) + 5. Generate gene-body meta-profile from bigWig files ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotProfile.html)) + 6. Calculate genome-wide IP enrichment relative to control ([`deepTools`](https://deeptools.readthedocs.io/en/develop/content/tools/plotFingerprint.html)) + 7. Calculate strand cross-correlation peak and ChIP-seq quality measures including NSC and RSC ([`phantompeakqualtools`](https://github.com/kundajelab/phantompeakqualtools)) + 8. Call broad/narrow peaks ([`MACS2`](https://github.com/macs3-project/MACS)) + 9. Annotate peaks relative to gene features ([`HOMER`](http://homer.ucsd.edu/homer/download.html)) + 10. Create consensus peakset across all samples and create tabular file to aid in the filtering of the data ([`BEDTools`](https://github.com/arq5x/bedtools2/)) + 11. Count reads in consensus peaks ([`featureCounts`](http://bioinf.wehi.edu.au/featureCounts/)) + 12. PCA and clustering ([`R`](https://www.r-project.org/), [`DESeq2`](https://bioconductor.org/packages/release/bioc/html/DESeq2.html)) 6. Create IGV session file containing bigWig tracks, peaks and differential sites for data visualisation ([`IGV`](https://software.broadinstitute.org/software/igv/)). 7. Present QC for raw read, alignment, peak-calling and differential binding results ([`MultiQC`](http://multiqc.info/), [`R`](https://www.r-project.org/)) ## Quick Start -1. Install [`nextflow`](https://nf-co.re/usage/installation) +1. Install [`Nextflow`](https://www.nextflow.io/docs/latest/getstarted.html#installation) (`>=21.10.3`) -2. Install either [`Docker`](https://docs.docker.com/engine/installation/) or [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) for full pipeline reproducibility _(please only use [`Conda`](https://conda.io/miniconda.html) as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_ +2. Install any of [`Docker`](https://docs.docker.com/engine/installation/), [`Singularity`](https://www.sylabs.io/guides/3.0/user-guide/) (you can follow [this tutorial](https://singularity-tutorial.github.io/01-installation/)), [`Podman`](https://podman.io/), [`Shifter`](https://nersc.gitlab.io/development/shifter/how-to-use/) or [`Charliecloud`](https://hpc.github.io/charliecloud/) for full pipeline reproducibility _(you can use [`Conda`](https://conda.io/miniconda.html) both to install Nextflow itself and also to manage software within pipelines. Please only use it within pipelines as a last resort; see [docs](https://nf-co.re/usage/configuration#basic-configuration-profiles))_. 3. Download the pipeline and test it on a minimal dataset with a single command: - ```bash - nextflow run nf-core/chipseq -profile test, - ``` + ```bash + nextflow run nf-core/chipseq -profile test,YOURPROFILE --outdir + ``` + + Note that some form of configuration will be needed so that Nextflow knows how to fetch the required software. This is usually done in the form of a config profile (`YOURPROFILE` in the example command above). You can chain multiple config profiles in a comma-separated string. - > Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + > - The pipeline comes with config profiles called `docker`, `singularity`, `podman`, `shifter`, `charliecloud` and `conda` which instruct the pipeline to use the named tool for software management. For example, `-profile test,docker`. + > - Please check [nf-core/configs](https://github.com/nf-core/configs#documentation) to see if a custom config file to run nf-core pipelines already exists for your Institute. If so, you can simply use `-profile ` in your command. This will enable either `docker` or `singularity` and set the appropriate execution settings for your local compute environment. + > - If you are using `singularity`, please use the [`nf-core download`](https://nf-co.re/tools/#downloading-pipelines-for-offline-use) command to download images first, before running the pipeline. Setting the [`NXF_SINGULARITY_CACHEDIR` or `singularity.cacheDir`](https://www.nextflow.io/docs/latest/singularity.html?#singularity-docker-hub) Nextflow options enables you to store and re-use the images from a central location for future pipeline runs. + > - If you are using `conda`, it is highly recommended to use the [`NXF_CONDA_CACHEDIR` or `conda.cacheDir`](https://www.nextflow.io/docs/latest/conda.html) settings to store the environments in a central location for future pipeline runs. 4. Start running your own analysis! - ```bash - nextflow run nf-core/chipseq -profile --input design.csv --genome GRCh37 - ``` + ```bash + nextflow run nf-core/chipseq --input samplesheet.csv --outdir --genome GRCh37 -profile + ``` -See [usage docs](docs/usage.md) for all of the available options when running the pipeline. +See [usage docs](https://nf-co.re/chipseq/usage) for all of the available options when running the pipeline. ## Documentation -The nf-core/chipseq pipeline comes with documentation about the pipeline, found in the `docs/` directory: - -1. [Installation](https://nf-co.re/usage/installation) -2. Pipeline configuration - * [Local installation](https://nf-co.re/usage/local_installation) - * [Adding your own system config](https://nf-co.re/usage/adding_own_config) - * [Reference genomes](https://nf-co.re/usage/reference_genomes) -3. [Running the pipeline](docs/usage.md) -4. [Output and how to interpret the results](docs/output.md) -5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) +The nf-core/chipseq pipeline comes with documentation about the pipeline: [usage](https://nf-co.re/chipseq/usage) and [output](https://nf-co.re/chipseq/output). ## Credits -These scripts were originally written by Chuan Wang ([@chuan-wang](https://github.com/chuan-wang)) and Phil Ewels ([@ewels](https://github.com/ewels)) for use at the [National Genomics Infrastructure](https://portal.scilifelab.se/genomics/) at [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden. The pipeline has since been re-implemented by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [The Bioinformatics & Biostatistics Group](https://www.crick.ac.uk/research/science-technology-platforms/bioinformatics-and-biostatistics/) at [The Francis Crick Institute](https://www.crick.ac.uk/), London. +These scripts were originally written by Chuan Wang ([@chuan-wang](https://github.com/chuan-wang)) and Phil Ewels ([@ewels](https://github.com/ewels)) for use at the [National Genomics Infrastructure](https://portal.scilifelab.se/genomics/) at [SciLifeLab](http://www.scilifelab.se/) in Stockholm, Sweden. The pipeline was re-implemented by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/) and converted to Nextflow DSL2 by Jose Espinosa-Carrasco ([@JoseEspinosa](https://github.com/JoseEspinosa)) from [The Comparative Bioinformatics Group](https://www.crg.eu/en/cedric_notredame) at [The Centre for Genomic Regulation, Spain](https://www.crg.eu/). Many thanks to others who have helped out and contributed along the way too, including (but not limited to): [@apeltzer](https://github.com/apeltzer), [@bc2zb](https://github.com/bc2zb), [@crickbabs](https://github.com/crickbabs), [@drejom](https://github.com/drejom), [@houghtos](https://github.com/houghtos), [@KevinMenden](https://github.com/KevinMenden), [@mashehu](https://github.com/mashehu), [@pditommaso](https://github.com/pditommaso), [@Rotholandus](https://github.com/Rotholandus), [@sofiahaglund](https://github.com/sofiahaglund), [@tiagochst](https://github.com/tiagochst) and [@winni2k](https://github.com/winni2k). @@ -94,17 +103,16 @@ If you would like to contribute to this pipeline, please see the [contributing g For further information or help, don't hesitate to get in touch on the [Slack `#chipseq` channel](https://nfcore.slack.com/channels/chipseq) (you can join with [this invite](https://nf-co.re/join/slack)). -## Citation +## Citations If you use nf-core/chipseq for your analysis, please cite it using the following doi: [10.5281/zenodo.3240506](https://doi.org/10.5281/zenodo.3240506) -You can cite the `nf-core` publication as follows: - An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. +You can cite the `nf-core` publication as follows: + > **The nf-core framework for community-curated bioinformatics pipelines.** > > Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen. > -> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). -> ReadCube: [Full Access Link](https://rdcu.be/b1GjZ) +> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x). diff --git a/assets/bamtools_filter_pe.json b/assets/bamtools_filter_pe.json index 323c186c0..618b7e9c3 100755 --- a/assets/bamtools_filter_pe.json +++ b/assets/bamtools_filter_pe.json @@ -1,18 +1,11 @@ { - "filters" : [ - { "id" : "insert_min", - "insertSize" : ">=-2000" - }, + "filters": [ + { "id": "insert_min", "insertSize": ">=-2000" }, - { "id" : "insert_max", - "insertSize" : "<=2000" - }, + { "id": "insert_max", "insertSize": "<=2000" }, - { "id" : "mismatch", - "tag" : "NM:<=4" - } + { "id": "mismatch", "tag": "NM:<=4" } ], - "rule" : " insert_min & insert_max & mismatch " - + "rule": " insert_min & insert_max & mismatch " } diff --git a/assets/bamtools_filter_se.json b/assets/bamtools_filter_se.json index 0b21d3e97..8928ab1cd 100755 --- a/assets/bamtools_filter_se.json +++ b/assets/bamtools_filter_se.json @@ -1,10 +1,5 @@ { - "filters" : [ - { "id" : "mismatch", - "tag" : "NM:<=4" - } - ], - - "rule" : " mismatch " + "filters": [{ "id": "mismatch", "tag": "NM:<=4" }], + "rule": " mismatch " } diff --git a/assets/blacklists/GRCh37-blacklist.bed b/assets/blacklists/GRCh37-blacklist.bed deleted file mode 100755 index dab19f361..000000000 --- a/assets/blacklists/GRCh37-blacklist.bed +++ /dev/null @@ -1,411 +0,0 @@ -1 564449 570371 High_Mappability_island 1000 . -1 724136 727043 Satellite_repeat 1000 . -1 825006 825115 BSR/Beta 1000 . -1 2583334 2634374 Low_mappability_island 1000 . -1 4363064 4363242 (CATTC)n 1000 . -1 5725866 5736651 Low_mappability_island 1000 . -1 16839923 16841396 Low_mappability_island 1000 . -1 38077347 38077423 Low_mappability_island 1000 . -1 91852785 91853147 LSU-rRNA_Hsa 1000 . -1 104163724 104163860 Low_mappability_island 1000 . -1 108112972 108113707 LSU-rRNA_Hsa 1000 . -1 121351474 121487059 centromeric_repeat 1000 . -1 142535434 142543081 Satellite_repeat 1000 . -1 142723256 142723968 Low_mappability_island 1000 . -1 142792613 142793303 Low_mappability_island 1000 . -1 142835822 142837333 Low_mappability_island 1000 . -1 143274490 143284340 centromeric_repeat 1000 . -1 145277108 145277572 LSU-rRNA_Hsa 1000 . -1 149033183 149035829 Satellite_repeat 1000 . -1 156186169 156186712 High_Mappability_island 1000 . -1 224199390 224204260 Satellite_repeat 1000 . -1 233318467 233318516 (CATTC)n 1000 . -1 236260366 236260821 Low_mappability_island 1000 . -1 237766308 237766764 LSU-rRNA_Hsa 1000 . -1 238105345 238105511 Low_mappability_island 1000 . -1 238108025 238108378 Low_mappability_island 1000 . -1 238108645 238109697 Low_mappability_island 1000 . -10 18841533 18862467 (CATTC)n 1000 . -10 20035661 20037171 Low_mappability_island 1000 . -10 36722282 36723650 Low_mappability_island 1000 . -10 38772277 38819357 Satellite_repeat 1000 . -10 38868892 38889025 Satellite_repeat 1000 . -10 39076515 39155771 Satellite_repeat 1000 . -10 42354835 42548642 centromeric_repeat 1000 . -10 42596676 42602082 Satellite_repeat 1000 . -10 42596700 42602110 Satellite_repeat 1000 . -10 42661264 42667623 Satellite_repeat 1000 . -10 42790522 42818398 Satellite_repeat 1000 . -10 135498649 135502716 Satellite_repeat 1000 . -11 6831669 6831838 ALR/Alpha 1000 . -11 10529403 10531969 Low_mappability_island 1000 . -11 48671444 48902406 centromeric_repeat 1000 . -11 48931242 48964015 centromeric_repeat 1000 . -11 50318471 50784078 centromeric_repeat 1000 . -11 51090700 51374066 centromeric_repeat 1000 . -11 51567242 51594226 centromeric_repeat 1000 . -11 54694046 55027975 centromeric_repeat 1000 . -11 73221660 73221946 Low_mappability_island 1000 . -11 85194913 85195322 LSU-rRNA_Hsa 1000 . -11 87524468 87525005 Low_mappability_island 1000 . -11 103275584 103281729 Low_mappability_island 1000 . -11 122874287 122874443 Low_mappability_island 1000 . -12 20704285 20704583 SSU-rRNA_Hsa 1000 . -12 34372315 34372825 LSU-rRNA_Hsa 1000 . -12 34432130 34857010 centromeric_repeat 1000 . -12 37989447 38441828 centromeric_repeat 1000 . -12 38531376 38531930 LSU-rRNA_Hsa 1000 . -12 41757383 41757545 Low_mappability_island 1000 . -12 127650407 127651075 LSU-rRNA_Hsa 1000 . -12 132061320 132062046 Low_mappability_island 1000 . -13 56545728 56545925 Low_mappability_island 1000 . -13 110076444 110076782 Low_mappability_island 1000 . -14 18999935 19056900 centromeric_repeat 1000 . -14 32953263 32954381 Low_mappability_island 1000 . -14 84637832 84639038 Low_mappability_island 1000 . -14 90341302 90341516 SSU-rRNA_Hsa 1000 . -15 19999941 20044132 centromeric_repeat 1000 . -16 32493036 32570826 ALR/Alpha 1000 . -16 32590063 32598801 ALR/Alpha 1000 . -16 33237130 33241330 Low_mappability_island 1000 . -16 33864355 34023306 centromeric_repeat 1000 . -16 34180542 34197081 Satellite_repeat 1000 . -16 34530115 34542632 BSR/Beta 1000 . -16 35193580 35285885 centromeric_repeat 1000 . -16 46385718 46456668 Satellite_repeat 1000 . -16 46497639 46500515 Satellite_repeat 1000 . -16 47538629 47539297 LSU-rRNA_Hsa 1000 . -17 19355538 19356096 LSU-rRNA_Hsa 1000 . -17 19502495 19506773 Low_mappability_island 1000 . -17 21905167 21906712 centromeric_repeat 1000 . -17 22018524 22032049 Low_mappability_island 1000 . -17 22221073 22263006 centromeric_repeat 1000 . -17 25263010 25268059 Satellite_repeat 1000 . -17 25415551 25417559 telomeric_repeat 1000 . -17 31149365 31149981 High_Mappability_island 1000 . -17 33478114 33478372 LSU-rRNA_Hsa 1000 . -17 41381502 41382591 High_Mappability_island 1000 . -17 41463538 41464075 High_Mappability_island 1000 . -17 41464478 41465015 snRNA 1000 . -17 41465562 41467288 High_Mappability_island 1000 . -17 51183038 51183763 Low_mappability_island 1000 . -17 55868618 55868752 LSU-rRNA_Hsa 1000 . -17 75158031 75158430 LSU-rRNA_Hsa 1000 . -18 96416 97552 Satellite_repeat 1000 . -18 105658 112233 Satellite_repeat 1000 . -18 2842252 2842356 Low_mappability_island 1000 . -18 15393801 15393992 centromeric_repeat 1000 . -18 18510894 18520356 centromeric_repeat 1000 . -18 44126235 44126593 (CATTC)n 1000 . -18 45379603 45379864 Low_mappability_island 1000 . -18 50319086 50319301 Low_mappability_island 1000 . -18 77772846 77773065 LSU-rRNA_Hsa 1000 . -19 246006 247844 TAR1 1000 . -19 22877614 22877696 SSU-rRNA_Hsa 1000 . -19 23235030 23235504 BSR/Beta 1000 . -19 24182398 24186210 LSU-rRNA_Hsa 1000 . -19 24385474 24633168 centromeric_repeat 1000 . -19 27730611 28262682 centromeric_repeat 1000 . -19 36066445 36066810 LSU-rRNA_Hsa 1000 . -19 36756398 36800948 centromeric_repeat 1000 . -19 37759473 37797722 centromeric_repeat 1000 . -19 44914313 44916340 ACRO1 1000 . -19 44960681 44962681 ACRO1 1000 . -2 739925 740994 Low_mappability_island 1000 . -2 49456729 49457067 Low_mappability_island 1000 . -2 88124390 88124903 Low_mappability_island 1000 . -2 89830421 89880514 Satellite_repeat 1000 . -2 90371401 90394776 Satellite_repeat 1000 . -2 90443001 90545431 Low_mappability_island 1000 . -2 91595080 91616015 Satellite_repeat 1000 . -2 92267428 92326280 centromeric_repeat 1000 . -2 115695017 115695281 LSU-rRNA_Hsa 1000 . -2 117781085 117781300 Low_mappability_island 1000 . -2 132966248 132989300 centromeric_repeat 1000 . -2 132994855 133007983 ALR/Alpha 1000 . -2 133011824 133013298 SSU-rRNA_Hsa 1000 . -2 133036250 133040042 LSU-rRNA_Hsa 1000 . -2 133044095 133045945 ACRO1 1000 . -2 143848503 143848792 Low_mappability_island 1000 . -2 148022736 148022878 Low_mappability_island 1000 . -2 149639207 149639515 Low_mappability_island 1000 . -2 156120500 156120610 Low_mappability_island 1000 . -2 162135000 162139241 Low_mappability_island 1000 . -2 230045426 230045796 LSU-rRNA_Hsa 1000 . -20 26257032 26320267 centromeric_repeat 1000 . -20 29517710 29521147 centromeric_repeat 1000 . -20 29803876 29833334 centromeric_repeat 1000 . -20 55932703 55936114 chrM 1000 . -20 62916702 62918053 telomeric_repeat 1000 . -21 9647205 9648529 Low_mappability_island 1000 . -21 9694896 9704962 centromeric_repeat 1000 . -21 9825451 9827612 High_Mappability_island 1000 . -21 9827612 9845233 Low_mappability_island 1000 . -21 9881895 9882569 TAR1 1000 . -21 10084922 10088004 Satellite_repeat 1000 . -21 10492876 10493049 Low_mappability_island 1000 . -21 10599428 10599915 TAR1 1000 . -21 10697886 10860890 centromeric_repeat 1000 . -21 11186054 11188131 Satellite_repeat 1000 . -21 14338127 14369791 centromeric_repeat 1000 . -21 18800575 18800997 (GAGTG)n 1000 . -21 27228003 27228242 SSU-rRNA_Hsa 1000 . -21 46796081 46796336 Low_mappability_island 1000 . -22 16847814 16862659 Satellite_repeat 1000 . -22 18876789 18884510 Satellite_repeat 1000 . -3 25508897 25509131 Low_mappability_island 1000 . -3 73159606 73161131 snRNA 1000 . -3 75696297 75699304 BSR/Beta 1000 . -3 75717841 75720426 Satellite_repeat 1000 . -3 80995858 81014459 ALR/Alpha 1000 . -3 90311686 90507410 centromeric_repeat 1000 . -3 93504815 93519133 centromeric_repeat 1000 . -3 96335934 96337436 Low_mappability_island 1000 . -3 160665423 160665642 Low_mappability_island 1000 . -3 196625514 196625860 Satellite_repeat 1000 . -3 197825427 197834080 Low_mappability_island 1000 . -4 9987 12694 telomeric_repeat 1000 . -4 12276463 12292424 ALR/Alpha 1000 . -4 12641862 12642305 Low_mappability_island 1000 . -4 21583630 21583719 (GAATG)n 1000 . -4 27732004 27732240 Low_mappability_island 1000 . -4 47774268 47774416 Low_mappability_island 1000 . -4 49085372 49342114 centromeric_repeat 1000 . -4 49488472 49662085 centromeric_repeat 1000 . -4 52659961 52688986 centromeric_repeat 1000 . -4 56194229 56194584 Low_mappability_island 1000 . -4 65473858 65473941 Low_mappability_island 1000 . -4 68264186 68266830 centromeric_repeat 1000 . -4 70296565 70296841 LSU-rRNA_Hsa 1000 . -4 76807083 76807320 LSU-rRNA_Hsa 1000 . -4 78929660 78929920 Low_mappability_island 1000 . -4 156374749 156377226 chrM 1000 . -4 156384860 156387314 Low_mappability_island 1000 . -4 163342479 163342744 Low_mappability_island 1000 . -4 190190746 190203442 Low_mappability_island 1000 . -4 190801869 190802909 Low_mappability_island 1000 . -4 190943802 190943962 Satellite_repeat 1000 . -4 190987268 190990949 Satellite_repeat 1000 . -4 191026302 191044344 telomeric_repeat 1000 . -5 17517177 17600940 Low_mappability_island 1000 . -5 21477365 21497415 Low_mappability_island 1000 . -5 34177882 34197574 Low_mappability_island 1000 . -5 45908253 46411114 centromeric_repeat 1000 . -5 49405493 49554574 centromeric_repeat 1000 . -5 71146650 71146996 LSU-rRNA_Hsa 1000 . -5 79945807 79948223 Low_mappability_island 1000 . -5 93903068 93906726 Low_mappability_island 1000 . -5 97746525 97746679 Low_mappability_island 1000 . -5 99381556 99390873 Low_mappability_island 1000 . -5 105889063 105889263 chrM 1000 . -5 123095972 123097432 chrM 1000 . -5 134258949 134264271 Low_mappability_island 1000 . -5 174541634 174542177 SSU-rRNA_Hsa 1000 . -6 58735349 58739031 centromeric_repeat 1000 . -6 58745955 58780547 centromeric_repeat 1000 . -6 61880095 61944008 centromeric_repeat 1000 . -6 62189892 62206612 ALR/Alpha 1000 . -6 62207809 62230644 ALR/Alpha 1000 . -6 62283966 62284581 Low_mappability_island 1000 . -6 133593944 133594201 LSU-rRNA_Hsa 1000 . -6 137059142 137059326 SSU-rRNA_Hsa 1000 . -6 150665074 150665281 SSU-rRNA_Hsa 1000 . -6 157731310 157735525 Low_mappability_island 1000 . -7 43878355 43878530 TAR1 1000 . -7 45291517 45291740 Low_mappability_island 1000 . -7 56437808 56442977 Low_mappability_island 1000 . -7 57253980 57254183 Low_mappability_island 1000 . -7 57255310 57255444 Low_mappability_island 1000 . -7 57261829 57261998 Low_mappability_island 1000 . -7 57544726 57556913 Satellite_repeat 1000 . -7 57811488 57836990 centromeric_repeat 1000 . -7 57939184 58055539 centromeric_repeat 1000 . -7 61054285 62454680 centromeric_repeat 1000 . -7 64059157 64066183 BSR/Beta 1000 . -7 64951348 64956223 centromeric_repeat 1000 . -7 68201468 68201673 Low_mappability_island 1000 . -7 68527370 68527788 LSU-rRNA_Hsa 1000 . -7 80962907 80963147 SSU-rRNA_Hsa 1000 . -7 100550640 100551321 Low_mappability_island 1000 . -7 142372972 142375638 Low_mappability_island 1000 . -7 145694403 145694561 Low_mappability_island 1000 . -8 155512 157639 TAR1 1000 . -8 21455971 21456306 LSU-rRNA_Hsa 1000 . -8 32868966 32873279 Low_mappability_island 1000 . -8 43092737 43097573 Satellite_repeat 1000 . -8 43399486 43843604 centromeric_repeat 1000 . -8 46838215 47457541 centromeric_repeat 1000 . -8 47739043 47742797 Low_mappability_island 1000 . -8 47750844 47776101 BSR/Beta 1000 . -8 56754955 56755418 LSU-rRNA_Hsa 1000 . -8 69218401 69218922 LSU-rRNA_Hsa 1000 . -8 70602248 70602620 LSU-rRNA_Hsa 1000 . -8 77114154 77114389 Low_mappability_island 1000 . -8 100508010 100508287 Low_mappability_island 1000 . -9 10435 11574 TAR1 1000 . -9 4799734 4800000 SSU-rRNA_Hsa 1000 . -9 33656606 33659249 Low_mappability_island 1000 . -9 42819021 42832395 centromeric_repeat 1000 . -9 44070617 44070871 Low_mappability_island 1000 . -9 44873123 44902307 centromeric_repeat 1000 . -9 45355954 45357644 telomeric_repeat 1000 . -9 45435109 45443517 centromeric_repeat 1000 . -9 66494170 66494805 TAR1 1000 . -9 66767710 66864329 centromeric_repeat 1000 . -9 66970914 67005594 centromeric_repeat 1000 . -9 67315122 67321036 centromeric_repeat 1000 . -9 67789868 67792893 centromeric_repeat 1000 . -9 68410775 68435115 Low_mappability_island 1000 . -9 69677073 69687998 centromeric_repeat 1000 . -9 69689770 69711497 centromeric_repeat 1000 . -9 69947961 70011196 centromeric_repeat 1000 . -9 70076144 70076855 centromeric_repeat 1000 . -9 70318723 70327683 centromeric_repeat 1000 . -9 72653073 72653572 Satellite_repeat 1000 . -9 78790077 78790255 (GAATG)n 1000 . -9 79186574 79187026 LSU-rRNA_Hsa 1000 . -9 141019938 141021783 TAR1 1000 . -MT 1 16569 chrM 1000 . -X 55206111 55206740 Low_mappability_island 1000 . -X 55207753 55208152 Low_mappability_island 1000 . -X 55208300 55208643 Low_mappability_island 1000 . -X 55208980 55209208 Low_mappability_island 1000 . -X 55209655 55210006 Low_mappability_island 1000 . -X 58330488 58330843 centromeric_repeat 1000 . -X 58373806 58373962 centromeric_repeat 1000 . -X 58377680 58377864 centromeric_repeat 1000 . -X 58415350 58416387 centromeric_repeat 1000 . -X 58432411 58432680 centromeric_repeat 1000 . -X 58485887 58486241 centromeric_repeat 1000 . -X 58488898 58494528 centromeric_repeat 1000 . -X 58499466 58504235 centromeric_repeat 1000 . -X 58506076 58528214 centromeric_repeat 1000 . -X 58528184 58536883 centromeric_repeat 1000 . -X 58544061 58582415 centromeric_repeat 1000 . -X 61681834 61919683 centromeric_repeat 1000 . -X 62003205 62041580 centromeric_repeat 1000 . -X 83658929 83659019 Low_mappability_island 1000 . -X 108297348 108297886 LSU-rRNA_Hsa 1000 . -X 114959057 115006437 Low_mappability_island 1000 . -X 125605623 125607351 Low_mappability_island 1000 . -X 125714985 125715338 Low_mappability_island 1000 . -X 125864844 125864980 Low_mappability_island 1000 . -X 125865719 125865874 Low_mappability_island 1000 . -Y 313470 313613 ALR/Alpha 1000 . -Y 3004989 3005175 LSU-rRNA_Hsa 1000 . -Y 4212807 4212910 Low_mappability_island 1000 . -Y 7671817 7694928 BSR/Beta 1000 . -Y 7726064 7730229 BSR/Beta 1000 . -Y 7730734 7731598 BSR/Beta 1000 . -Y 7735811 7752887 BSR/Beta 1000 . -Y 7785067 7806311 BSR/Beta 1000 . -Y 7806856 7814704 BSR/Beta 1000 . -Y 7815230 7820478 BSR/Beta 1000 . -Y 7829937 7832032 BSR/Beta 1000 . -Y 7832744 7848695 BSR/Beta 1000 . -Y 7870343 7873582 BSR/Beta 1000 . -Y 7874115 7874584 BSR/Beta 1000 . -Y 7875409 7885257 BSR/Beta 1000 . -Y 7886545 7894591 BSR/Beta 1000 . -Y 7898927 7916812 BSR/Beta 1000 . -Y 7918790 7921352 BSR/Beta 1000 . -Y 7926344 7936705 BSR/Beta 1000 . -Y 7941130 7947438 BSR/Beta 1000 . -Y 7948790 7964448 BSR/Beta 1000 . -Y 8179010 8181143 BSR/Beta 1000 . -Y 8181757 8213330 BSR/Beta 1000 . -Y 8214629 8215637 BSR/Beta 1000 . -Y 8220421 8230061 BSR/Beta 1000 . -Y 8230686 8231546 BSR/Beta 1000 . -Y 8240772 8265916 BSR/Beta 1000 . -Y 8291535 8292942 BSR/Beta 1000 . -Y 8294002 8295175 BSR/Beta 1000 . -Y 8296944 8321375 BSR/Beta 1000 . -Y 8325813 8325929 BSR/Beta 1000 . -Y 8326678 8333466 BSR/Beta 1000 . -Y 8334027 8342387 BSR/Beta 1000 . -Y 8356544 8369346 BSR/Beta 1000 . -Y 8909560 8909925 TAR1 1000 . -Y 8979478 8979585 Low_mappability_island 1000 . -Y 9072781 9072993 TAR1 1000 . -Y 9908430 9925608 centromeric_repeat 1000 . -Y 9981952 9982126 BSR/Beta 1000 . -Y 10034864 10036712 SSU-rRNA_Hsa 1000 . -Y 10040627 10045657 ALR/Alpha 1000 . -Y 10047773 10052533 ALR/Alpha 1000 . -Y 10053695 10057722 ALR/Alpha 1000 . -Y 10059394 10073694 ALR/Alpha 1000 . -Y 10075082 10075781 ALR/Alpha 1000 . -Y 10080736 10104539 ALR/Alpha 1000 . -Y 13104530 13144368 centromeric_repeat 1000 . -Y 13193966 13196535 Low_mappability_island 1000 . -Y 13252193 13259484 centromeric_repeat 1000 . -Y 13290177 13290667 chrM 1000 . -Y 13445957 13490591 Satellite_repeat 1000 . -Y 13642186 13749784 Satellite_repeat 1000 . -Y 13798522 13870984 Satellite_repeat 1000 . -Y 19691913 19692524 LSU-rRNA_Hsa 1000 . -Y 19764063 19776198 ALR/Alpha 1000 . -Y 19780600 19781704 ALR/Alpha 1000 . -Y 19783669 19796396 ALR/Alpha 1000 . -Y 19800068 19801419 ALR/Alpha 1000 . -Y 19808085 19817100 ALR/Alpha 1000 . -Y 19944298 19944581 TAR1 1000 . -Y 20235195 20235478 TAR1 1000 . -Y 20362679 20371694 ALR/Alpha 1000 . -Y 20378360 20379711 ALR/Alpha 1000 . -Y 20383383 20396110 ALR/Alpha 1000 . -Y 20398075 20399179 ALR/Alpha 1000 . -Y 20403581 20415713 ALR/Alpha 1000 . -Y 20487248 20487859 LSU-rRNA_Hsa 1000 . -Y 23124788 23125577 BSR/Beta 1000 . -Y 23149027 23151205 BSR/Beta 1000 . -Y 23157969 23158245 BSR/Beta 1000 . -Y 23159001 23167737 BSR/Beta 1000 . -Y 23178886 23181770 BSR/Beta 1000 . -Y 23220740 23223625 BSR/Beta 1000 . -Y 23234125 23235822 BSR/Beta 1000 . -Y 23236898 23248080 BSR/Beta 1000 . -Y 23248729 23248851 BSR/Beta 1000 . -Y 23899295 23899388 TAR1 1000 . -Y 23956449 23956628 TAR1 1000 . -Y 24247659 24247700 TAR1 1000 . -Y 24630999 24631040 TAR1 1000 . -Y 24953159 24975657 BSR/Beta 1000 . -Y 24980997 24991235 BSR/Beta 1000 . -Y 25022753 25039185 BSR/Beta 1000 . -Y 25040153 25042421 BSR/Beta 1000 . -Y 25048332 25059258 BSR/Beta 1000 . -Y 25060235 25064798 BSR/Beta 1000 . -Y 25099139 25121882 BSR/Beta 1000 . -Y 25122419 25160800 BSR/Beta 1000 . -Y 25182404 25192372 BSR/Beta 1000 . -Y 25217722 25219409 BSR/Beta 1000 . -Y 25493588 25495275 BSR/Beta 1000 . -Y 26148315 26148450 TAR1 1000 . -Y 26586905 26609405 BSR/Beta 1000 . -Y 26614745 26624983 BSR/Beta 1000 . -Y 26656502 26672934 BSR/Beta 1000 . -Y 26673902 26676170 BSR/Beta 1000 . -Y 26682081 26693007 BSR/Beta 1000 . -Y 26693984 26698547 BSR/Beta 1000 . -Y 26732883 26755623 BSR/Beta 1000 . -Y 26756160 26794538 BSR/Beta 1000 . -Y 26816148 26826116 BSR/Beta 1000 . -Y 26851466 26853153 BSR/Beta 1000 . -Y 27109247 27110934 BSR/Beta 1000 . -Y 27136281 27146249 BSR/Beta 1000 . -Y 27167859 27206241 BSR/Beta 1000 . -Y 27206778 27229502 BSR/Beta 1000 . -Y 27263848 27268411 BSR/Beta 1000 . -Y 27269388 27280315 BSR/Beta 1000 . -Y 27286226 27288494 BSR/Beta 1000 . -Y 27289462 27305895 BSR/Beta 1000 . -Y 27337415 27347656 BSR/Beta 1000 . -Y 27352996 27375497 BSR/Beta 1000 . -Y 27813984 27814119 TAR1 1000 . -Y 28555026 28555353 TAR1 1000 . -Y 28784129 28819695 Satellite_repeat 1000 . -Y 58819367 58917648 (CATTC)n 1000 . -Y 58971913 58997782 (CATTC)n 1000 . -Y 59361267 59362785 TAR1 1000 . diff --git a/assets/blacklists/GRCm38-blacklist.bed b/assets/blacklists/GRCm38-blacklist.bed deleted file mode 100755 index 3699d020a..000000000 --- a/assets/blacklists/GRCm38-blacklist.bed +++ /dev/null @@ -1,164 +0,0 @@ -10 3110060 3110270 -10 22142530 22142880 -10 22142830 22143070 -10 58223870 58224100 -10 58225260 58225500 -10 58228320 58228520 -11 3148660 3148860 -11 3154960 3155170 -11 3158530 3158750 -11 3161780 3161990 -11 3167020 3167250 -11 3169390 3169620 -11 3172450 3172670 -11 3172950 3173190 -11 3184190 3185750 -11 3185700 3186360 -11 3186330 3189230 -11 3189190 3190740 -11 3190750 3191000 -11 3190960 3194430 -11 3194400 3195310 -11 3195240 3197220 -11 3197340 3197950 -11 3197890 3198700 -11 3198630 3199440 -11 3199350 3200120 -11 54139940 54140230 -11 54140470 54140740 -11 88967720 88969600 -11 88969850 88970350 -11 109011550 109012090 -12 3109920 3110150 -12 105436040 105436270 -13 3372960 3373380 -13 3373410 3373630 -13 77438870 77439090 -13 97190460 97190690 -13 99790830 99791090 -13 119488570 119489320 -13 119597600 119598320 -13 119599860 119600050 -13 119601360 119601600 -13 119601800 119602210 -13 119602360 119602580 -13 119609430 119611430 -13 119612760 119613370 -13 119613360 119617690 -14 19415650 19417330 -14 19417240 19417660 -14 19417570 19418920 -14 19418830 19419720 -14 47454330 47454510 -15 75085430 75085920 -15 75085990 75086240 -15 75086150 75086550 -15 75086540 75087110 -16 11143960 11144170 -16 57391420 57391740 -17 13305860 13306280 -17 13590820 13591650 -17 13654880 13655120 -17 36231170 36231390 -17 39842910 39846780 -17 39846920 39847160 -17 39847090 39847310 -17 39847400 39847720 -17 39847630 39848880 -18 3005550 3005770 -18 3005700 3006050 -18 12949190 12949400 -18 40307970 40308340 -18 68691990 68692230 -19 45650030 45650310 -19 61199640 61199880 -19 61224310 61224530 -19 61266550 61266760 -19 61266920 61267210 -1 24612620 24612850 -1 48881430 48881690 -1 58613870 58614090 -1 78573920 78574140 -1 88217960 88221950 -1 88223300 88224760 -1 133595120 133595340 -1 183299040 183299660 -1 195241610 195241820 -2 3050030 3050410 -2 5379200 5379420 -2 22743580 22743780 -2 22744760 22744980 -2 90395030 90395240 -2 98662130 98663060 -2 98663540 98664150 -2 98664780 98665020 -2 98664970 98665250 -2 98666140 98667390 -2 181917260 181917590 -2 181917550 181917990 -2 181918970 181919260 -2 181928340 181928570 -2 181928950 181929170 -2 181929220 181929430 -2 181930800 181931020 -3 5860530 5860830 -3 8245690 8245930 -3 8246280 8246640 -4 34935690 34935910 -4 70378040 70378320 -4 118548460 118548700 -5 14914900 14915120 -5 15006590 15006820 -5 15462500 15462730 -5 15463060 15463290 -5 15486990 15487190 -5 134378920 134379160 -5 137152130 137152510 -5 146260900 146261410 -6 3201380 3201610 -6 103648970 103649310 -7 12010340 12010870 -8 14306800 14307040 -8 15519790 15520030 -8 19711890 19712070 -9 2999900 3000320 -9 3000270 3000570 -9 3000900 3001100 -9 3001300 3001520 -9 3004390 3004680 -9 3004690 3004900 -9 3005000 3005220 -9 3005800 3006030 -9 3006960 3007180 -9 3008880 3009040 -9 3015170 3015420 -9 3015590 3015830 -9 3016770 3016980 -9 3017410 3017650 -9 3018240 3018540 -9 3018650 3018870 -9 3019220 3019450 -9 3021160 3021370 -9 3021990 3022300 -9 3024660 3024880 -9 3025350 3025690 -9 3026530 3026860 -9 3027010 3027250 -9 3027660 3027880 -9 3028670 3028880 -9 3030040 3030330 -9 3031910 3032130 -9 3032250 3032560 -9 3032570 3032790 -9 3034090 3034300 -9 3034950 3035160 -9 3035610 3036180 -9 3036200 3036480 -9 3036420 3036660 -9 3037250 3037460 -9 3037910 3038120 -9 3038050 3038300 -9 24541940 24542200 -9 35305120 35305620 -9 110281190 110281400 -9 123872950 123873160 diff --git a/assets/blacklists/hg38-blacklist.bed b/assets/blacklists/hg38-blacklist.bed deleted file mode 100644 index c3e00911c..000000000 --- a/assets/blacklists/hg38-blacklist.bed +++ /dev/null @@ -1,38 +0,0 @@ -chr10 38528030 38529790 -chr10 42070420 42070660 -chr16 34571420 34571640 -chr16 34572700 34572930 -chr16 34584530 34584840 -chr16 34585000 34585220 -chr16 34585700 34586380 -chr16 34586660 34587100 -chr16 34587060 34587660 -chr16 34587900 34588170 -chr16 34593000 34593590 -chr16 34594490 34594720 -chr16 34594900 34595150 -chr16 34595320 34595570 -chr16 46380910 46381140 -chr16 46386270 46386530 -chr16 46390180 46390930 -chr16 46394370 46395100 -chr16 46395670 46395910 -chr16 46398780 46399020 -chr16 46400700 46400970 -chr1 124450730 124450960 -chr20 28513520 28513770 -chr20 31060210 31060770 -chr20 31061050 31061560 -chr20 31063990 31064490 -chr20 31067930 31069060 -chr20 31069000 31069280 -chr21 8219780 8220120 -chr21 8234330 8234620 -chr2 90397520 90397900 -chr2 90398120 90398760 -chr3 93470260 93470870 -chr4 49118760 49119010 -chr4 49120790 49121130 -chr5 49601430 49602300 -chr5 49657080 49657690 -chr5 49661330 49661570 diff --git a/assets/blacklists/mm10-blacklist.bed b/assets/blacklists/mm10-blacklist.bed deleted file mode 100755 index 8c71cd3ba..000000000 --- a/assets/blacklists/mm10-blacklist.bed +++ /dev/null @@ -1,164 +0,0 @@ -chr10 3110060 3110270 -chr10 22142530 22142880 -chr10 22142830 22143070 -chr10 58223870 58224100 -chr10 58225260 58225500 -chr10 58228320 58228520 -chr11 3148660 3148860 -chr11 3154960 3155170 -chr11 3158530 3158750 -chr11 3161780 3161990 -chr11 3167020 3167250 -chr11 3169390 3169620 -chr11 3172450 3172670 -chr11 3172950 3173190 -chr11 3184190 3185750 -chr11 3185700 3186360 -chr11 3186330 3189230 -chr11 3189190 3190740 -chr11 3190750 3191000 -chr11 3190960 3194430 -chr11 3194400 3195310 -chr11 3195240 3197220 -chr11 3197340 3197950 -chr11 3197890 3198700 -chr11 3198630 3199440 -chr11 3199350 3200120 -chr11 54139940 54140230 -chr11 54140470 54140740 -chr11 88967720 88969600 -chr11 88969850 88970350 -chr11 109011550 109012090 -chr12 3109920 3110150 -chr12 105436040 105436270 -chr13 3372960 3373380 -chr13 3373410 3373630 -chr13 77438870 77439090 -chr13 97190460 97190690 -chr13 99790830 99791090 -chr13 119488570 119489320 -chr13 119597600 119598320 -chr13 119599860 119600050 -chr13 119601360 119601600 -chr13 119601800 119602210 -chr13 119602360 119602580 -chr13 119609430 119611430 -chr13 119612760 119613370 -chr13 119613360 119617690 -chr14 19415650 19417330 -chr14 19417240 19417660 -chr14 19417570 19418920 -chr14 19418830 19419720 -chr14 47454330 47454510 -chr15 75085430 75085920 -chr15 75085990 75086240 -chr15 75086150 75086550 -chr15 75086540 75087110 -chr16 11143960 11144170 -chr16 57391420 57391740 -chr17 13305860 13306280 -chr17 13590820 13591650 -chr17 13654880 13655120 -chr17 36231170 36231390 -chr17 39842910 39846780 -chr17 39846920 39847160 -chr17 39847090 39847310 -chr17 39847400 39847720 -chr17 39847630 39848880 -chr18 3005550 3005770 -chr18 3005700 3006050 -chr18 12949190 12949400 -chr18 40307970 40308340 -chr18 68691990 68692230 -chr19 45650030 45650310 -chr19 61199640 61199880 -chr19 61224310 61224530 -chr19 61266550 61266760 -chr19 61266920 61267210 -chr1 24612620 24612850 -chr1 48881430 48881690 -chr1 58613870 58614090 -chr1 78573920 78574140 -chr1 88217960 88221950 -chr1 88223300 88224760 -chr1 133595120 133595340 -chr1 183299040 183299660 -chr1 195241610 195241820 -chr2 3050030 3050410 -chr2 5379200 5379420 -chr2 22743580 22743780 -chr2 22744760 22744980 -chr2 90395030 90395240 -chr2 98662130 98663060 -chr2 98663540 98664150 -chr2 98664780 98665020 -chr2 98664970 98665250 -chr2 98666140 98667390 -chr2 181917260 181917590 -chr2 181917550 181917990 -chr2 181918970 181919260 -chr2 181928340 181928570 -chr2 181928950 181929170 -chr2 181929220 181929430 -chr2 181930800 181931020 -chr3 5860530 5860830 -chr3 8245690 8245930 -chr3 8246280 8246640 -chr4 34935690 34935910 -chr4 70378040 70378320 -chr4 118548460 118548700 -chr5 14914900 14915120 -chr5 15006590 15006820 -chr5 15462500 15462730 -chr5 15463060 15463290 -chr5 15486990 15487190 -chr5 134378920 134379160 -chr5 137152130 137152510 -chr5 146260900 146261410 -chr6 3201380 3201610 -chr6 103648970 103649310 -chr7 12010340 12010870 -chr8 14306800 14307040 -chr8 15519790 15520030 -chr8 19711890 19712070 -chr9 2999900 3000320 -chr9 3000270 3000570 -chr9 3000900 3001100 -chr9 3001300 3001520 -chr9 3004390 3004680 -chr9 3004690 3004900 -chr9 3005000 3005220 -chr9 3005800 3006030 -chr9 3006960 3007180 -chr9 3008880 3009040 -chr9 3015170 3015420 -chr9 3015590 3015830 -chr9 3016770 3016980 -chr9 3017410 3017650 -chr9 3018240 3018540 -chr9 3018650 3018870 -chr9 3019220 3019450 -chr9 3021160 3021370 -chr9 3021990 3022300 -chr9 3024660 3024880 -chr9 3025350 3025690 -chr9 3026530 3026860 -chr9 3027010 3027250 -chr9 3027660 3027880 -chr9 3028670 3028880 -chr9 3030040 3030330 -chr9 3031910 3032130 -chr9 3032250 3032560 -chr9 3032570 3032790 -chr9 3034090 3034300 -chr9 3034950 3035160 -chr9 3035610 3036180 -chr9 3036200 3036480 -chr9 3036420 3036660 -chr9 3037250 3037460 -chr9 3037910 3038120 -chr9 3038050 3038300 -chr9 24541940 24542200 -chr9 35305120 35305620 -chr9 110281190 110281400 -chr9 123872950 123873160 diff --git a/assets/blacklists/v1.0/GRCh37-blacklist.v1.bed b/assets/blacklists/v1.0/GRCh37-blacklist.v1.bed new file mode 100644 index 000000000..b87063023 --- /dev/null +++ b/assets/blacklists/v1.0/GRCh37-blacklist.v1.bed @@ -0,0 +1,411 @@ +1 564449 570371 High_Mappability_island 1000 . +1 724136 727043 Satellite_repeat 1000 . +1 825006 825115 BSR/Beta 1000 . +1 2583334 2634374 Low_mappability_island 1000 . +1 4363064 4363242 (CATTC)n 1000 . +1 5725866 5736651 Low_mappability_island 1000 . +1 16839923 16841396 Low_mappability_island 1000 . +1 38077347 38077423 Low_mappability_island 1000 . +1 91852785 91853147 LSU-rRNA_Hsa 1000 . +1 104163724 104163860 Low_mappability_island 1000 . +1 108112972 108113707 LSU-rRNA_Hsa 1000 . +1 121351474 121487059 centromeric_repeat 1000 . +1 142535434 142543081 Satellite_repeat 1000 . +1 142723256 142723968 Low_mappability_island 1000 . +1 142792613 142793303 Low_mappability_island 1000 . +1 142835822 142837333 Low_mappability_island 1000 . +1 143274490 143284340 centromeric_repeat 1000 . +1 145277108 145277572 LSU-rRNA_Hsa 1000 . +1 149033183 149035829 Satellite_repeat 1000 . +1 156186169 156186712 High_Mappability_island 1000 . +1 224199390 224204260 Satellite_repeat 1000 . +1 233318467 233318516 (CATTC)n 1000 . +1 236260366 236260821 Low_mappability_island 1000 . +1 237766308 237766764 LSU-rRNA_Hsa 1000 . +1 238105345 238105511 Low_mappability_island 1000 . +1 238108025 238108378 Low_mappability_island 1000 . +1 238108645 238109697 Low_mappability_island 1000 . +10 18841533 18862467 (CATTC)n 1000 . +10 20035661 20037171 Low_mappability_island 1000 . +10 36722282 36723650 Low_mappability_island 1000 . +10 38772277 38819357 Satellite_repeat 1000 . +10 38868892 38889025 Satellite_repeat 1000 . +10 39076515 39155771 Satellite_repeat 1000 . +10 42354835 42548642 centromeric_repeat 1000 . +10 42596676 42602082 Satellite_repeat 1000 . +10 42596700 42602110 Satellite_repeat 1000 . +10 42661264 42667623 Satellite_repeat 1000 . +10 42790522 42818398 Satellite_repeat 1000 . +10 135498649 135502716 Satellite_repeat 1000 . +11 6831669 6831838 ALR/Alpha 1000 . +11 10529403 10531969 Low_mappability_island 1000 . +11 48671444 48902406 centromeric_repeat 1000 . +11 48931242 48964015 centromeric_repeat 1000 . +11 50318471 50784078 centromeric_repeat 1000 . +11 51090700 51374066 centromeric_repeat 1000 . +11 51567242 51594226 centromeric_repeat 1000 . +11 54694046 55027975 centromeric_repeat 1000 . +11 73221660 73221946 Low_mappability_island 1000 . +11 85194913 85195322 LSU-rRNA_Hsa 1000 . +11 87524468 87525005 Low_mappability_island 1000 . +11 103275584 103281729 Low_mappability_island 1000 . +11 122874287 122874443 Low_mappability_island 1000 . +12 20704285 20704583 SSU-rRNA_Hsa 1000 . +12 34372315 34372825 LSU-rRNA_Hsa 1000 . +12 34432130 34857010 centromeric_repeat 1000 . +12 37989447 38441828 centromeric_repeat 1000 . +12 38531376 38531930 LSU-rRNA_Hsa 1000 . +12 41757383 41757545 Low_mappability_island 1000 . +12 127650407 127651075 LSU-rRNA_Hsa 1000 . +12 132061320 132062046 Low_mappability_island 1000 . +13 56545728 56545925 Low_mappability_island 1000 . +13 110076444 110076782 Low_mappability_island 1000 . +14 18999935 19056900 centromeric_repeat 1000 . +14 32953263 32954381 Low_mappability_island 1000 . +14 84637832 84639038 Low_mappability_island 1000 . +14 90341302 90341516 SSU-rRNA_Hsa 1000 . +15 19999941 20044132 centromeric_repeat 1000 . +16 32493036 32570826 ALR/Alpha 1000 . +16 32590063 32598801 ALR/Alpha 1000 . +16 33237130 33241330 Low_mappability_island 1000 . +16 33864355 34023306 centromeric_repeat 1000 . +16 34180542 34197081 Satellite_repeat 1000 . +16 34530115 34542632 BSR/Beta 1000 . +16 35193580 35285885 centromeric_repeat 1000 . +16 46385718 46456668 Satellite_repeat 1000 . +16 46497639 46500515 Satellite_repeat 1000 . +16 47538629 47539297 LSU-rRNA_Hsa 1000 . +17 19355538 19356096 LSU-rRNA_Hsa 1000 . +17 19502495 19506773 Low_mappability_island 1000 . +17 21905167 21906712 centromeric_repeat 1000 . +17 22018524 22032049 Low_mappability_island 1000 . +17 22221073 22263006 centromeric_repeat 1000 . +17 25263010 25268059 Satellite_repeat 1000 . +17 25415551 25417559 telomeric_repeat 1000 . +17 31149365 31149981 High_Mappability_island 1000 . +17 33478114 33478372 LSU-rRNA_Hsa 1000 . +17 41381502 41382591 High_Mappability_island 1000 . +17 41463538 41464075 High_Mappability_island 1000 . +17 41464478 41465015 snRNA 1000 . +17 41465562 41467288 High_Mappability_island 1000 . +17 51183038 51183763 Low_mappability_island 1000 . +17 55868618 55868752 LSU-rRNA_Hsa 1000 . +17 75158031 75158430 LSU-rRNA_Hsa 1000 . +18 96416 97552 Satellite_repeat 1000 . +18 105658 112233 Satellite_repeat 1000 . +18 2842252 2842356 Low_mappability_island 1000 . +18 15393801 15393992 centromeric_repeat 1000 . +18 18510894 18520356 centromeric_repeat 1000 . +18 44126235 44126593 (CATTC)n 1000 . +18 45379603 45379864 Low_mappability_island 1000 . +18 50319086 50319301 Low_mappability_island 1000 . +18 77772846 77773065 LSU-rRNA_Hsa 1000 . +19 246006 247844 TAR1 1000 . +19 22877614 22877696 SSU-rRNA_Hsa 1000 . +19 23235030 23235504 BSR/Beta 1000 . +19 24182398 24186210 LSU-rRNA_Hsa 1000 . +19 24385474 24633168 centromeric_repeat 1000 . +19 27730611 28262682 centromeric_repeat 1000 . +19 36066445 36066810 LSU-rRNA_Hsa 1000 . +19 36756398 36800948 centromeric_repeat 1000 . +19 37759473 37797722 centromeric_repeat 1000 . +19 44914313 44916340 ACRO1 1000 . +19 44960681 44962681 ACRO1 1000 . +2 739925 740994 Low_mappability_island 1000 . +2 49456729 49457067 Low_mappability_island 1000 . +2 88124390 88124903 Low_mappability_island 1000 . +2 89830421 89880514 Satellite_repeat 1000 . +2 90371401 90394776 Satellite_repeat 1000 . +2 90443001 90545431 Low_mappability_island 1000 . +2 91595080 91616015 Satellite_repeat 1000 . +2 92267428 92326280 centromeric_repeat 1000 . +2 115695017 115695281 LSU-rRNA_Hsa 1000 . +2 117781085 117781300 Low_mappability_island 1000 . +2 132966248 132989300 centromeric_repeat 1000 . +2 132994855 133007983 ALR/Alpha 1000 . +2 133011824 133013298 SSU-rRNA_Hsa 1000 . +2 133036250 133040042 LSU-rRNA_Hsa 1000 . +2 133044095 133045945 ACRO1 1000 . +2 143848503 143848792 Low_mappability_island 1000 . +2 148022736 148022878 Low_mappability_island 1000 . +2 149639207 149639515 Low_mappability_island 1000 . +2 156120500 156120610 Low_mappability_island 1000 . +2 162135000 162139241 Low_mappability_island 1000 . +2 230045426 230045796 LSU-rRNA_Hsa 1000 . +20 26257032 26320267 centromeric_repeat 1000 . +20 29517710 29521147 centromeric_repeat 1000 . +20 29803876 29833334 centromeric_repeat 1000 . +20 55932703 55936114 chrM 1000 . +20 62916702 62918053 telomeric_repeat 1000 . +21 9647205 9648529 Low_mappability_island 1000 . +21 9694896 9704962 centromeric_repeat 1000 . +21 9825451 9827612 High_Mappability_island 1000 . +21 9827612 9845233 Low_mappability_island 1000 . +21 9881895 9882569 TAR1 1000 . +21 10084922 10088004 Satellite_repeat 1000 . +21 10492876 10493049 Low_mappability_island 1000 . +21 10599428 10599915 TAR1 1000 . +21 10697886 10860890 centromeric_repeat 1000 . +21 11186054 11188131 Satellite_repeat 1000 . +21 14338127 14369791 centromeric_repeat 1000 . +21 18800575 18800997 (GAGTG)n 1000 . +21 27228003 27228242 SSU-rRNA_Hsa 1000 . +21 46796081 46796336 Low_mappability_island 1000 . +22 16847814 16862659 Satellite_repeat 1000 . +22 18876789 18884510 Satellite_repeat 1000 . +3 25508897 25509131 Low_mappability_island 1000 . +3 73159606 73161131 snRNA 1000 . +3 75696297 75699304 BSR/Beta 1000 . +3 75717841 75720426 Satellite_repeat 1000 . +3 80995858 81014459 ALR/Alpha 1000 . +3 90311686 90507410 centromeric_repeat 1000 . +3 93504815 93519133 centromeric_repeat 1000 . +3 96335934 96337436 Low_mappability_island 1000 . +3 160665423 160665642 Low_mappability_island 1000 . +3 196625514 196625860 Satellite_repeat 1000 . +3 197825427 197834080 Low_mappability_island 1000 . +4 9987 12694 telomeric_repeat 1000 . +4 12276463 12292424 ALR/Alpha 1000 . +4 12641862 12642305 Low_mappability_island 1000 . +4 21583630 21583719 (GAATG)n 1000 . +4 27732004 27732240 Low_mappability_island 1000 . +4 47774268 47774416 Low_mappability_island 1000 . +4 49085372 49342114 centromeric_repeat 1000 . +4 49488472 49662085 centromeric_repeat 1000 . +4 52659961 52688986 centromeric_repeat 1000 . +4 56194229 56194584 Low_mappability_island 1000 . +4 65473858 65473941 Low_mappability_island 1000 . +4 68264186 68266830 centromeric_repeat 1000 . +4 70296565 70296841 LSU-rRNA_Hsa 1000 . +4 76807083 76807320 LSU-rRNA_Hsa 1000 . +4 78929660 78929920 Low_mappability_island 1000 . +4 156374749 156377226 chrM 1000 . +4 156384860 156387314 Low_mappability_island 1000 . +4 163342479 163342744 Low_mappability_island 1000 . +4 190190746 190203442 Low_mappability_island 1000 . +4 190801869 190802909 Low_mappability_island 1000 . +4 190943802 190943962 Satellite_repeat 1000 . +4 190987268 190990949 Satellite_repeat 1000 . +4 191026302 191044344 telomeric_repeat 1000 . +5 17517177 17600940 Low_mappability_island 1000 . +5 21477365 21497415 Low_mappability_island 1000 . +5 34177882 34197574 Low_mappability_island 1000 . +5 45908253 46411114 centromeric_repeat 1000 . +5 49405493 49554574 centromeric_repeat 1000 . +5 71146650 71146996 LSU-rRNA_Hsa 1000 . +5 79945807 79948223 Low_mappability_island 1000 . +5 93903068 93906726 Low_mappability_island 1000 . +5 97746525 97746679 Low_mappability_island 1000 . +5 99381556 99390873 Low_mappability_island 1000 . +5 105889063 105889263 chrM 1000 . +5 123095972 123097432 chrM 1000 . +5 134258949 134264271 Low_mappability_island 1000 . +5 174541634 174542177 SSU-rRNA_Hsa 1000 . +6 58735349 58739031 centromeric_repeat 1000 . +6 58745955 58780547 centromeric_repeat 1000 . +6 61880095 61944008 centromeric_repeat 1000 . +6 62189892 62206612 ALR/Alpha 1000 . +6 62207809 62230644 ALR/Alpha 1000 . +6 62283966 62284581 Low_mappability_island 1000 . +6 133593944 133594201 LSU-rRNA_Hsa 1000 . +6 137059142 137059326 SSU-rRNA_Hsa 1000 . +6 150665074 150665281 SSU-rRNA_Hsa 1000 . +6 157731310 157735525 Low_mappability_island 1000 . +7 43878355 43878530 TAR1 1000 . +7 45291517 45291740 Low_mappability_island 1000 . +7 56437808 56442977 Low_mappability_island 1000 . +7 57253980 57254183 Low_mappability_island 1000 . +7 57255310 57255444 Low_mappability_island 1000 . +7 57261829 57261998 Low_mappability_island 1000 . +7 57544726 57556913 Satellite_repeat 1000 . +7 57811488 57836990 centromeric_repeat 1000 . +7 57939184 58055539 centromeric_repeat 1000 . +7 61054285 62454680 centromeric_repeat 1000 . +7 64059157 64066183 BSR/Beta 1000 . +7 64951348 64956223 centromeric_repeat 1000 . +7 68201468 68201673 Low_mappability_island 1000 . +7 68527370 68527788 LSU-rRNA_Hsa 1000 . +7 80962907 80963147 SSU-rRNA_Hsa 1000 . +7 100550640 100551321 Low_mappability_island 1000 . +7 142372972 142375638 Low_mappability_island 1000 . +7 145694403 145694561 Low_mappability_island 1000 . +8 155512 157639 TAR1 1000 . +8 21455971 21456306 LSU-rRNA_Hsa 1000 . +8 32868966 32873279 Low_mappability_island 1000 . +8 43092737 43097573 Satellite_repeat 1000 . +8 43399486 43843604 centromeric_repeat 1000 . +8 46838215 47457541 centromeric_repeat 1000 . +8 47739043 47742797 Low_mappability_island 1000 . +8 47750844 47776101 BSR/Beta 1000 . +8 56754955 56755418 LSU-rRNA_Hsa 1000 . +8 69218401 69218922 LSU-rRNA_Hsa 1000 . +8 70602248 70602620 LSU-rRNA_Hsa 1000 . +8 77114154 77114389 Low_mappability_island 1000 . +8 100508010 100508287 Low_mappability_island 1000 . +9 10435 11574 TAR1 1000 . +9 4799734 4800000 SSU-rRNA_Hsa 1000 . +9 33656606 33659249 Low_mappability_island 1000 . +9 42819021 42832395 centromeric_repeat 1000 . +9 44070617 44070871 Low_mappability_island 1000 . +9 44873123 44902307 centromeric_repeat 1000 . +9 45355954 45357644 telomeric_repeat 1000 . +9 45435109 45443517 centromeric_repeat 1000 . +9 66494170 66494805 TAR1 1000 . +9 66767710 66864329 centromeric_repeat 1000 . +9 66970914 67005594 centromeric_repeat 1000 . +9 67315122 67321036 centromeric_repeat 1000 . +9 67789868 67792893 centromeric_repeat 1000 . +9 68410775 68435115 Low_mappability_island 1000 . +9 69677073 69687998 centromeric_repeat 1000 . +9 69689770 69711497 centromeric_repeat 1000 . +9 69947961 70011196 centromeric_repeat 1000 . +9 70076144 70076855 centromeric_repeat 1000 . +9 70318723 70327683 centromeric_repeat 1000 . +9 72653073 72653572 Satellite_repeat 1000 . +9 78790077 78790255 (GAATG)n 1000 . +9 79186574 79187026 LSU-rRNA_Hsa 1000 . +9 141019938 141021783 TAR1 1000 . +MT 1 16569 chrM 1000 . +X 55206111 55206740 Low_mappability_island 1000 . +X 55207753 55208152 Low_mappability_island 1000 . +X 55208300 55208643 Low_mappability_island 1000 . +X 55208980 55209208 Low_mappability_island 1000 . +X 55209655 55210006 Low_mappability_island 1000 . +X 58330488 58330843 centromeric_repeat 1000 . +X 58373806 58373962 centromeric_repeat 1000 . +X 58377680 58377864 centromeric_repeat 1000 . +X 58415350 58416387 centromeric_repeat 1000 . +X 58432411 58432680 centromeric_repeat 1000 . +X 58485887 58486241 centromeric_repeat 1000 . +X 58488898 58494528 centromeric_repeat 1000 . +X 58499466 58504235 centromeric_repeat 1000 . +X 58506076 58528214 centromeric_repeat 1000 . +X 58528184 58536883 centromeric_repeat 1000 . +X 58544061 58582415 centromeric_repeat 1000 . +X 61681834 61919683 centromeric_repeat 1000 . +X 62003205 62041580 centromeric_repeat 1000 . +X 83658929 83659019 Low_mappability_island 1000 . +X 108297348 108297886 LSU-rRNA_Hsa 1000 . +X 114959057 115006437 Low_mappability_island 1000 . +X 125605623 125607351 Low_mappability_island 1000 . +X 125714985 125715338 Low_mappability_island 1000 . +X 125864844 125864980 Low_mappability_island 1000 . +X 125865719 125865874 Low_mappability_island 1000 . +Y 313470 313613 ALR/Alpha 1000 . +Y 3004989 3005175 LSU-rRNA_Hsa 1000 . +Y 4212807 4212910 Low_mappability_island 1000 . +Y 7671817 7694928 BSR/Beta 1000 . +Y 7726064 7730229 BSR/Beta 1000 . +Y 7730734 7731598 BSR/Beta 1000 . +Y 7735811 7752887 BSR/Beta 1000 . +Y 7785067 7806311 BSR/Beta 1000 . +Y 7806856 7814704 BSR/Beta 1000 . +Y 7815230 7820478 BSR/Beta 1000 . +Y 7829937 7832032 BSR/Beta 1000 . +Y 7832744 7848695 BSR/Beta 1000 . +Y 7870343 7873582 BSR/Beta 1000 . +Y 7874115 7874584 BSR/Beta 1000 . +Y 7875409 7885257 BSR/Beta 1000 . +Y 7886545 7894591 BSR/Beta 1000 . +Y 7898927 7916812 BSR/Beta 1000 . +Y 7918790 7921352 BSR/Beta 1000 . +Y 7926344 7936705 BSR/Beta 1000 . +Y 7941130 7947438 BSR/Beta 1000 . +Y 7948790 7964448 BSR/Beta 1000 . +Y 8179010 8181143 BSR/Beta 1000 . +Y 8181757 8213330 BSR/Beta 1000 . +Y 8214629 8215637 BSR/Beta 1000 . +Y 8220421 8230061 BSR/Beta 1000 . +Y 8230686 8231546 BSR/Beta 1000 . +Y 8240772 8265916 BSR/Beta 1000 . +Y 8291535 8292942 BSR/Beta 1000 . +Y 8294002 8295175 BSR/Beta 1000 . +Y 8296944 8321375 BSR/Beta 1000 . +Y 8325813 8325929 BSR/Beta 1000 . +Y 8326678 8333466 BSR/Beta 1000 . +Y 8334027 8342387 BSR/Beta 1000 . +Y 8356544 8369346 BSR/Beta 1000 . +Y 8909560 8909925 TAR1 1000 . +Y 8979478 8979585 Low_mappability_island 1000 . +Y 9072781 9072993 TAR1 1000 . +Y 9908430 9925608 centromeric_repeat 1000 . +Y 9981952 9982126 BSR/Beta 1000 . +Y 10034864 10036712 SSU-rRNA_Hsa 1000 . +Y 10040627 10045657 ALR/Alpha 1000 . +Y 10047773 10052533 ALR/Alpha 1000 . +Y 10053695 10057722 ALR/Alpha 1000 . +Y 10059394 10073694 ALR/Alpha 1000 . +Y 10075082 10075781 ALR/Alpha 1000 . +Y 10080736 10104539 ALR/Alpha 1000 . +Y 13104530 13144368 centromeric_repeat 1000 . +Y 13193966 13196535 Low_mappability_island 1000 . +Y 13252193 13259484 centromeric_repeat 1000 . +Y 13290177 13290667 chrM 1000 . +Y 13445957 13490591 Satellite_repeat 1000 . +Y 13642186 13749784 Satellite_repeat 1000 . +Y 13798522 13870984 Satellite_repeat 1000 . +Y 19691913 19692524 LSU-rRNA_Hsa 1000 . +Y 19764063 19776198 ALR/Alpha 1000 . +Y 19780600 19781704 ALR/Alpha 1000 . +Y 19783669 19796396 ALR/Alpha 1000 . +Y 19800068 19801419 ALR/Alpha 1000 . +Y 19808085 19817100 ALR/Alpha 1000 . +Y 19944298 19944581 TAR1 1000 . +Y 20235195 20235478 TAR1 1000 . +Y 20362679 20371694 ALR/Alpha 1000 . +Y 20378360 20379711 ALR/Alpha 1000 . +Y 20383383 20396110 ALR/Alpha 1000 . +Y 20398075 20399179 ALR/Alpha 1000 . +Y 20403581 20415713 ALR/Alpha 1000 . +Y 20487248 20487859 LSU-rRNA_Hsa 1000 . +Y 23124788 23125577 BSR/Beta 1000 . +Y 23149027 23151205 BSR/Beta 1000 . +Y 23157969 23158245 BSR/Beta 1000 . +Y 23159001 23167737 BSR/Beta 1000 . +Y 23178886 23181770 BSR/Beta 1000 . +Y 23220740 23223625 BSR/Beta 1000 . +Y 23234125 23235822 BSR/Beta 1000 . +Y 23236898 23248080 BSR/Beta 1000 . +Y 23248729 23248851 BSR/Beta 1000 . +Y 23899295 23899388 TAR1 1000 . +Y 23956449 23956628 TAR1 1000 . +Y 24247659 24247700 TAR1 1000 . +Y 24630999 24631040 TAR1 1000 . +Y 24953159 24975657 BSR/Beta 1000 . +Y 24980997 24991235 BSR/Beta 1000 . +Y 25022753 25039185 BSR/Beta 1000 . +Y 25040153 25042421 BSR/Beta 1000 . +Y 25048332 25059258 BSR/Beta 1000 . +Y 25060235 25064798 BSR/Beta 1000 . +Y 25099139 25121882 BSR/Beta 1000 . +Y 25122419 25160800 BSR/Beta 1000 . +Y 25182404 25192372 BSR/Beta 1000 . +Y 25217722 25219409 BSR/Beta 1000 . +Y 25493588 25495275 BSR/Beta 1000 . +Y 26148315 26148450 TAR1 1000 . +Y 26586905 26609405 BSR/Beta 1000 . +Y 26614745 26624983 BSR/Beta 1000 . +Y 26656502 26672934 BSR/Beta 1000 . +Y 26673902 26676170 BSR/Beta 1000 . +Y 26682081 26693007 BSR/Beta 1000 . +Y 26693984 26698547 BSR/Beta 1000 . +Y 26732883 26755623 BSR/Beta 1000 . +Y 26756160 26794538 BSR/Beta 1000 . +Y 26816148 26826116 BSR/Beta 1000 . +Y 26851466 26853153 BSR/Beta 1000 . +Y 27109247 27110934 BSR/Beta 1000 . +Y 27136281 27146249 BSR/Beta 1000 . +Y 27167859 27206241 BSR/Beta 1000 . +Y 27206778 27229502 BSR/Beta 1000 . +Y 27263848 27268411 BSR/Beta 1000 . +Y 27269388 27280315 BSR/Beta 1000 . +Y 27286226 27288494 BSR/Beta 1000 . +Y 27289462 27305895 BSR/Beta 1000 . +Y 27337415 27347656 BSR/Beta 1000 . +Y 27352996 27375497 BSR/Beta 1000 . +Y 27813984 27814119 TAR1 1000 . +Y 28555026 28555353 TAR1 1000 . +Y 28784129 28819695 Satellite_repeat 1000 . +Y 58819367 58917648 (CATTC)n 1000 . +Y 58971913 58997782 (CATTC)n 1000 . +Y 59361267 59362785 TAR1 1000 . diff --git a/assets/blacklists/hg19-blacklist.bed b/assets/blacklists/v1.0/hg19-blacklist.v1.bed old mode 100755 new mode 100644 similarity index 100% rename from assets/blacklists/hg19-blacklist.bed rename to assets/blacklists/v1.0/hg19-blacklist.v1.bed diff --git a/assets/blacklists/v2.0/GRCm38-blacklist.v2.bed b/assets/blacklists/v2.0/GRCm38-blacklist.v2.bed new file mode 100644 index 000000000..41007eb72 --- /dev/null +++ b/assets/blacklists/v2.0/GRCm38-blacklist.v2.bed @@ -0,0 +1,3435 @@ +10 0 3135400 High Signal Region +10 3218900 3276600 Low Mappability +10 3576900 3627700 Low Mappability +10 4191100 4197600 Low Mappability +10 4613500 4615400 High Signal Region +10 4761300 4763900 High Signal Region +10 5080800 5096600 Low Mappability +10 5580100 5586600 Low Mappability +10 6281200 6286700 High Signal Region +10 6740200 6742100 High Signal Region +10 7396300 7429800 High Signal Region +10 7633600 7636600 Low Mappability +10 7889700 7897500 High Signal Region +10 8144900 8153000 High Signal Region +10 8264000 8269200 High Signal Region +10 8382400 8404400 High Signal Region +10 8599200 8606400 Low Mappability +10 10012200 10033400 High Signal Region +10 10566900 10593500 High Signal Region +10 11218400 11224800 Low Mappability +10 11351800 11406300 Low Mappability +10 11491200 11493100 High Signal Region +10 11612300 11642500 High Signal Region +10 11692500 11701300 Low Mappability +10 12266500 12273000 High Signal Region +10 12385800 12396000 High Signal Region +10 13401200 13403100 High Signal Region +10 14559900 14577100 High Signal Region +10 14646300 14664500 Low Mappability +10 14923800 14928300 High Signal Region +10 15047600 15083100 High Signal Region +10 15528600 15534200 High Signal Region +10 15567000 15641800 High Signal Region +10 16967500 16971600 High Signal Region +10 17499600 17501700 High Signal Region +10 18555500 18558100 High Signal Region +10 19427600 19429100 High Signal Region +10 19538800 19546100 Low Mappability +10 19772200 19801600 High Signal Region +10 20458900 20460800 High Signal Region +10 21208600 21216600 Low Mappability +10 21278500 21313500 High Signal Region +10 21642200 21649600 Low Mappability +10 21727800 21736400 Low Mappability +10 22031300 22063500 High Signal Region +10 22127200 22164500 High Signal Region +10 22186700 22290500 High Signal Region +10 22369100 22472300 High Signal Region +10 22683100 22690600 Low Mappability +10 22935900 22941800 High Signal Region +10 24687500 24691700 Low Mappability +10 25091400 25106900 Low Mappability +10 25622900 25629400 Low Mappability +10 25968400 25973400 Low Mappability +10 26641500 26662800 Low Mappability +10 27403200 27407600 High Signal Region +10 27904000 27909500 High Signal Region +10 28908500 28940600 High Signal Region +10 29243900 29249600 High Signal Region +10 29924300 29930700 Low Mappability +10 29954000 29971900 High Signal Region +10 30553000 30577100 High Signal Region +10 31054900 31095900 Low Mappability +10 31406500 31411100 High Signal Region +10 31750000 31757100 Low Mappability +10 31878400 31885800 High Signal Region +10 31980100 32000400 Low Mappability +10 32039700 32045000 High Signal Region +10 32176100 32182400 High Signal Region +10 32499200 32529900 High Signal Region +10 32816400 32857200 High Signal Region +10 33315300 33319800 High Signal Region +10 33492300 33508900 High Signal Region +10 33886600 33901100 Low Mappability +10 34739400 34749100 Low Mappability +10 35669300 35725500 High Signal Region +10 36130200 36135500 High Signal Region +10 36160700 36166700 High Signal Region +10 36594500 36597500 Low Mappability +10 36942200 36948800 Low Mappability +10 37186500 37189300 High Signal Region +10 37799700 37821400 High Signal Region +10 37964600 37970100 High Signal Region +10 38590100 38606100 High Signal Region +10 38637900 38644200 High Signal Region +10 38729400 38782700 High Signal Region +10 38933500 38956500 High Signal Region +10 39126700 39129400 High Signal Region +10 39760700 39764700 High Signal Region +10 41185700 41195800 High Signal Region +10 41840500 41859100 Low Mappability +10 43769400 43773800 High Signal Region +10 44206300 44254100 High Signal Region +10 45515000 45588000 Low Mappability +10 45624800 45628400 High Signal Region +10 46136500 46139300 High Signal Region +10 46468300 46472100 High Signal Region +10 46500500 46538800 High Signal Region +10 46789300 46812500 High Signal Region +10 46966700 47009000 High Signal Region +10 47048600 47074700 Low Mappability +10 47663600 47683500 High Signal Region +10 47743600 47758500 High Signal Region +10 47875400 47881600 High Signal Region +10 48032400 48058800 High Signal Region +10 48677400 48682800 High Signal Region +10 49823500 49842200 High Signal Region +10 50029200 50035300 High Signal Region +10 50109900 50115500 High Signal Region +10 50178500 50184800 High Signal Region +10 50253700 50296500 High Signal Region +10 50333400 50335300 High Signal Region +10 50524000 50553900 High Signal Region +10 51126200 51132900 High Signal Region +10 51436800 51448000 High Signal Region +10 51470300 51474900 High Signal Region +10 51882900 51888000 Low Mappability +10 52052600 52059000 Low Mappability +10 52089600 52148500 High Signal Region +10 52522600 52599800 High Signal Region +10 53073900 53081100 High Signal Region +10 53569600 53576000 Low Mappability +10 54216200 54222900 High Signal Region +10 54588800 54619900 Low Mappability +10 55080400 55090500 High Signal Region +10 55654500 55659600 High Signal Region +10 55715600 55751000 High Signal Region +10 55841700 55847900 High Signal Region +10 56250200 56293900 High Signal Region +10 56701000 56728000 High Signal Region +10 56894100 56897300 High Signal Region +10 57099200 57153200 High Signal Region +10 57239100 57245400 High Signal Region +10 57326900 57333900 High Signal Region +10 57434000 57456500 High Signal Region +10 57678600 57684900 High Signal Region +10 57862800 58240900 High Signal Region +10 58566200 58570900 High Signal Region +10 59381400 59396800 Low Mappability +10 59850500 59922300 Low Mappability +10 60444900 60446800 High Signal Region +10 60546600 60553100 Low Mappability +10 61373100 61375000 High Signal Region +10 63103900 63111200 Low Mappability +10 63508800 63519000 High Signal Region +10 63833800 63835000 High Signal Region +10 64418600 64420000 High Signal Region +10 65166300 65172600 High Signal Region +10 65450400 65477700 High Signal Region +10 65638900 65670200 High Signal Region +10 65938900 65956300 Low Mappability +10 66422900 66431000 High Signal Region +10 66662400 66678300 High Signal Region +10 69030100 69065800 High Signal Region +10 70657500 70668500 High Signal Region +10 70785400 70798600 Low Mappability +10 71012700 71019200 Low Mappability +10 71111600 71114200 Low Mappability +10 71510600 71637800 High Signal Region +10 71691300 71698600 Low Mappability +10 72292400 72314300 High Signal Region +10 72359200 72360700 High Signal Region +10 72493500 72499200 High Signal Region +10 72590700 72591900 High Signal Region +10 72690900 72709500 High Signal Region +10 73378200 73380100 High Signal Region +10 73576400 73601900 High Signal Region +10 74433300 74439500 High Signal Region +10 74655700 74672200 High Signal Region +10 74715300 74746600 High Signal Region +10 74857500 74888000 High Signal Region +10 76835100 76852400 High Signal Region +10 77950600 77979500 Low Mappability +10 78008300 78028800 Low Mappability +10 78637000 78696000 High Signal Region +10 78731500 78735800 High Signal Region +10 78803500 78823100 Low Mappability +10 79207800 79259400 High Signal Region +10 79314000 79354000 Low Mappability +10 80102300 80116000 High Signal Region +10 80928600 80996300 Low Mappability +10 81167600 81199400 High Signal Region +10 81600900 81997900 High Signal Region +10 82517500 82538800 High Signal Region +10 82571100 82575200 High Signal Region +10 82939800 82956300 High Signal Region +10 83386600 83392400 Low Mappability +10 83670800 83678100 Low Mappability +10 83768200 83792700 Low Mappability +10 84155900 84180800 Low Mappability +10 84436900 84473700 Low Mappability +10 84744500 84750100 Low Mappability +10 85413200 85419700 Low Mappability +10 85696600 85732800 High Signal Region +10 85840200 85872500 High Signal Region +10 86561700 86565700 High Signal Region +10 88628700 88658500 Low Mappability +10 88963900 88968200 Low Mappability +10 89398700 89400100 High Signal Region +10 89949700 89964500 High Signal Region +10 90249000 90255300 High Signal Region +10 90324500 90329800 Low Mappability +10 90471200 90474200 Low Mappability +10 91252200 91256900 High Signal Region +10 91928900 91944500 High Signal Region +10 92909200 92915800 High Signal Region +10 94362500 94369300 Low Mappability +10 94591500 94610000 High Signal Region +10 94871200 94873100 High Signal Region +10 96068700 96078800 High Signal Region +10 96157200 96162600 Low Mappability +10 96192400 96199800 Low Mappability +10 97320500 97329700 High Signal Region +10 97525500 97534200 Low Mappability +10 97755000 97761200 High Signal Region +10 97896600 97920300 High Signal Region +10 98337800 98343700 High Signal Region +10 98433100 98444100 High Signal Region +10 100310500 100395900 High Signal Region +10 102667700 102669600 High Signal Region +10 102859800 102861500 High Signal Region +10 103500200 103519100 High Signal Region +10 103547000 103548600 High Signal Region +10 103569600 103575200 High Signal Region +10 103600400 103684400 High Signal Region +10 103936700 103942500 High Signal Region +10 104380700 104382300 High Signal Region +10 104493600 104499800 High Signal Region +10 104539700 104562500 Low Mappability +10 104748100 104771500 High Signal Region +10 104819400 104862500 Low Mappability +10 104966900 105001700 Low Mappability +10 105177000 105181900 Low Mappability +10 105672500 105678000 Low Mappability +10 106166900 106235700 High Signal Region +10 106382800 106403000 High Signal Region +10 106427100 106453600 High Signal Region +10 106529600 106535200 Low Mappability +10 107125500 107136900 Low Mappability +10 107551800 107560700 High Signal Region +10 107845300 107863900 High Signal Region +10 107978900 108006700 Low Mappability +10 109212600 109216800 High Signal Region +10 109315100 109322400 Low Mappability +10 109941600 109948000 High Signal Region +10 110104900 110111300 Low Mappability +10 110504500 110516000 High Signal Region +10 110667700 110700900 Low Mappability +10 111217500 111219000 High Signal Region +10 112013700 112021700 High Signal Region +10 112053500 112058400 Low Mappability +10 112540600 112542100 High Signal Region +10 112587000 112611100 High Signal Region +10 112682400 112722100 Low Mappability +10 113722600 113729800 Low Mappability +10 114167300 114174900 High Signal Region +10 114736400 114738300 High Signal Region +10 114860600 114866900 High Signal Region +10 115641300 115643100 High Signal Region +10 116606200 116613400 Low Mappability +10 116762000 116764200 High Signal Region +10 116878000 116879900 High Signal Region +10 117476200 117491000 High Signal Region +10 118014300 118033200 High Signal Region +10 118054000 118076600 High Signal Region +10 118199900 118279700 Low Mappability +10 118910200 118917100 High Signal Region +10 118937400 118953000 Low Mappability +10 119698800 119701600 Low Mappability +10 120974800 120977500 High Signal Region +10 121136000 121143400 Low Mappability +10 121164700 121169300 Low Mappability +10 121566100 121580200 High Signal Region +10 121707800 121713500 High Signal Region +10 121762300 121769400 High Signal Region +10 122141100 122166000 High Signal Region +10 122346900 122371300 Low Mappability +10 122632400 122638000 High Signal Region +10 122832900 122839300 High Signal Region +10 123792900 123797100 High Signal Region +10 124412900 124433300 High Signal Region +10 124576300 124583500 Low Mappability +10 124605700 124611000 Low Mappability +10 124680500 124686200 Low Mappability +10 124760500 124788800 High Signal Region +10 125819500 125825700 High Signal Region +10 125869000 125871400 High Signal Region +10 126262200 126291600 Low Mappability +10 127779500 127797900 High Signal Region +10 129189500 129217200 High Signal Region +10 129388700 129419600 Low Mappability +10 129443000 129454800 High Signal Region +10 129734500 129736400 High Signal Region +10 129925300 129940600 Low Mappability +10 130039500 130052900 High Signal Region +10 130396900 130408000 High Signal Region +10 130542000 130694900 High Signal Region +11 0 3201000 High Signal Region +11 5167600 5182600 High Signal Region +11 5361500 5365400 Low Mappability +11 5552700 5558200 Low Mappability +11 6141300 6148700 Low Mappability +11 7489400 7492300 High Signal Region +11 7752300 7774500 Low Mappability +11 8058600 8083100 Low Mappability +11 8354900 8370700 High Signal Region +11 8907200 8936100 Low Mappability +11 9707900 9715100 Low Mappability +11 9807600 9814200 Low Mappability +11 10252000 10266800 High Signal Region +11 10760200 10770800 Low Mappability +11 11287200 11295100 High Signal Region +11 12129400 12163100 High Signal Region +11 12507200 12512700 Low Mappability +11 12561900 12569100 Low Mappability +11 12750500 12802700 High Signal Region +11 12856200 12863700 High Signal Region +11 12953900 12960700 Low Mappability +11 14896500 14922100 High Signal Region +11 15227600 15235000 Low Mappability +11 16022400 16029000 High Signal Region +11 16326500 16331700 High Signal Region +11 16418200 16419600 High Signal Region +11 16567100 16573100 High Signal Region +11 17401400 17407800 High Signal Region +11 18330900 18342700 High Signal Region +11 18773800 18780100 High Signal Region +11 19566100 19570600 Low Mappability +11 19788600 19809400 Low Mappability +11 20310000 20312000 High Signal Region +11 20377900 20380400 High Signal Region +11 22322000 22340700 Low Mappability +11 22395200 22432900 Low Mappability +11 22534700 22537000 Low Mappability +11 23218500 23258100 Low Mappability +11 23522600 23552900 High Signal Region +11 24527400 24529500 Low Mappability +11 25196800 25217300 High Signal Region +11 25796400 25802200 Low Mappability +11 26898500 26900500 High Signal Region +11 27525200 27541400 High Signal Region +11 28097200 28104500 Low Mappability +11 29064100 29129900 Low Mappability +11 29259900 29291300 High Signal Region +11 29586000 29592400 Low Mappability +11 30511100 30535400 High Signal Region +11 31343800 31345700 Low Mappability +11 33062300 33068800 Low Mappability +11 34541000 34683100 High Signal Region +11 37482400 37484900 High Signal Region +11 40230800 40248400 High Signal Region +11 40625500 40640300 Low Mappability +11 40796600 40860600 High Signal Region +11 40887700 40915600 High Signal Region +11 41631700 41633600 High Signal Region +11 43237300 43239300 Low Mappability +11 43286400 43329800 High Signal Region +11 43454800 43462300 Low Mappability +11 43659700 43682100 Low Mappability +11 45584200 45655700 Low Mappability +11 46412300 46415000 Low Mappability +11 46492800 46514400 Low Mappability +11 47847500 47860600 High Signal Region +11 48451800 48536100 High Signal Region +11 48929800 49060400 Low Mappability +11 50445100 50469600 High Signal Region +11 51437600 51456700 High Signal Region +11 51664900 51690400 Low Mappability +11 54135500 54141600 High Signal Region +11 54576500 54583300 Low Mappability +11 55240500 55248100 Low Mappability +11 56588500 56594500 High Signal Region +11 57301700 57303600 High Signal Region +11 60558900 60699000 Low Mappability +11 61407400 61427800 Low Mappability +11 61593700 61596500 Low Mappability +11 62879300 62901500 High Signal Region +11 63467600 63475000 Low Mappability +11 64568100 64574200 High Signal Region +11 64681700 64683600 Low Mappability +11 64791900 64827100 Low Mappability +11 65451700 65458800 Low Mappability +11 66629900 66634100 High Signal Region +11 66947700 66958600 Low Mappability +11 67866400 67872800 Low Mappability +11 70155800 70162400 Low Mappability +11 71505700 71512100 Low Mappability +11 71875200 71881700 Low Mappability +11 73436900 73439100 Low Mappability +11 74128800 74136200 Low Mappability +11 74199900 74226800 Low Mappability +11 74301700 74319600 High Signal Region +11 74540000 74548400 Low Mappability +11 74884300 74899000 Low Mappability +11 76828100 76868600 Low Mappability +11 77255000 77257100 Low Mappability +11 79845100 79847300 Low Mappability +11 79872400 79877100 Low Mappability +11 79917300 79920800 Low Mappability +11 81545400 81552800 Low Mappability +11 82123300 82144400 High Signal Region +11 82333900 82338400 Low Mappability +11 83050300 83093600 High Signal Region +11 83126000 83172300 Low Mappability +11 85046500 85067800 High Signal Region +11 85285400 85292700 High Signal Region +11 88910900 88917600 Low Mappability +11 88965900 88971900 High Signal Region +11 89080800 89101300 High Signal Region +11 90504000 90510500 High Signal Region +11 90829400 90835000 Low Mappability +11 90901700 90908400 Low Mappability +11 90958500 91026800 Low Mappability +11 91047200 91049300 Low Mappability +11 92099000 92108200 High Signal Region +11 93409300 93428900 High Signal Region +11 94622900 94629900 Low Mappability +11 96065000 96093900 High Signal Region +11 98586900 98673900 Low Mappability +11 99712600 99717300 High Signal Region +11 100662800 100669700 Low Mappability +11 101731800 101741400 High Signal Region +11 102992300 103049900 Low Mappability +11 104239000 104242600 Low Mappability +11 106028100 106037400 High Signal Region +11 106254800 106297600 High Signal Region +11 106943500 106950100 Low Mappability +11 107188200 107200400 High Signal Region +11 107281300 107283200 High Signal Region +11 108377600 108404500 Low Mappability +11 108649800 108655400 Low Mappability +11 109010700 109024400 High Signal Region +11 109998500 110024600 Low Mappability +11 110421300 110423200 High Signal Region +11 111182400 111189800 Low Mappability +11 111215500 111234900 Low Mappability +11 111353300 111360000 Low Mappability +11 111855400 111857100 High Signal Region +11 112010600 112016400 High Signal Region +11 114456300 114462800 Low Mappability +11 115014300 115046900 Low Mappability +11 115611200 115665700 High Signal Region +11 115754800 115766900 Low Mappability +11 116389300 116395200 Low Mappability +11 116742700 116792800 Low Mappability +11 117499800 117505100 Low Mappability +11 119299800 119340300 Low Mappability +11 120305300 120357300 Low Mappability +11 120515100 120644700 High Signal Region +11 121069800 121075100 High Signal Region +11 121203000 121207500 Low Mappability +11 121396100 121422700 Low Mappability +11 121611900 121614000 Low Mappability +11 121981400 122082500 High Signal Region +12 0 3070900 High Signal Region +12 3102800 3111000 High Signal Region +12 4110500 4112400 High Signal Region +12 4218500 4235300 High Signal Region +12 4751600 4790100 High Signal Region +12 5050300 5065400 High Signal Region +12 6514000 6525100 High Signal Region +12 6606500 6612600 High Signal Region +12 7447300 7449900 High Signal Region +12 7801900 7808600 High Signal Region +12 7925300 7939600 High Signal Region +12 8572000 8640600 High Signal Region +12 10693000 10704200 High Signal Region +12 10961300 11004600 High Signal Region +12 11187600 11194100 High Signal Region +12 11642900 11658000 High Signal Region +12 12092500 12097600 High Signal Region +12 14844600 14848200 High Signal Region +12 15026600 15032400 High Signal Region +12 15252700 15259600 High Signal Region +12 15866100 15871800 High Signal Region +12 16746900 16748800 High Signal Region +12 17116400 17129400 High Signal Region +12 17243500 17248500 High Signal Region +12 18340700 18354800 High Signal Region +12 18856500 18909700 High Signal Region +12 19312600 19413500 High Signal Region +12 19442600 19590100 High Signal Region +12 19627700 19633600 High Signal Region +12 19777500 19781600 High Signal Region +12 19879300 19901200 High Signal Region +12 19931800 19948600 High Signal Region +12 20031900 20205100 High Signal Region +12 20225600 20298300 High Signal Region +12 21914300 21916000 Low Mappability +12 21972100 21987900 High Signal Region +12 22021600 22680500 Low Mappability +12 22896100 22902300 High Signal Region +12 23140700 23225200 High Signal Region +12 23283500 24030600 High Signal Region +12 24295300 24365100 Low Mappability +12 24692300 24727100 High Signal Region +12 25591800 25595300 Low Mappability +12 25840400 25842100 High Signal Region +12 27556800 27592000 High Signal Region +12 28491400 28494000 High Signal Region +12 28954800 28964000 High Signal Region +12 29379500 29400800 High Signal Region +12 30965100 31016300 High Signal Region +12 32020400 32032500 Low Mappability +12 32217700 32219200 High Signal Region +12 33388100 33410100 Low Mappability +12 33748900 33771800 High Signal Region +12 33869500 33880600 High Signal Region +12 34056800 34074100 High Signal Region +12 34128700 34139700 High Signal Region +12 34623000 34629000 Low Mappability +12 35783900 35814400 High Signal Region +12 36099400 36107200 High Signal Region +12 36679100 36700200 Low Mappability +12 36952200 36957900 High Signal Region +12 38746900 38749300 High Signal Region +12 41363500 41385500 High Signal Region +12 41502600 41516100 High Signal Region +12 41860000 41870200 High Signal Region +12 42124500 42126300 High Signal Region +12 42437900 42443400 High Signal Region +12 42666800 42690800 High Signal Region +12 43335600 43349300 High Signal Region +12 43659100 43675300 High Signal Region +12 43953900 43986900 High Signal Region +12 44064500 44070600 High Signal Region +12 44765600 44795900 Low Mappability +12 45768700 45773700 High Signal Region +12 45949200 45962200 High Signal Region +12 46707000 46709200 High Signal Region +12 47027300 47039300 High Signal Region +12 47280500 47286800 High Signal Region +12 47328600 47331300 High Signal Region +12 47646800 47648300 High Signal Region +12 47833000 47834900 High Signal Region +12 47995600 47997600 High Signal Region +12 48842900 48849500 High Signal Region +12 49124800 49155700 High Signal Region +12 49245200 49272100 High Signal Region +12 49606200 49612000 High Signal Region +12 50784600 50789900 High Signal Region +12 51486000 51492000 High Signal Region +12 52157900 52176400 High Signal Region +12 52200400 52223200 High Signal Region +12 52579600 52581200 High Signal Region +12 52730000 52735400 Low Mappability +12 52906200 52952300 High Signal Region +12 54358500 54369200 High Signal Region +12 54705400 54743600 High Signal Region +12 55079600 55267300 Low Mappability +12 56104100 56110600 Low Mappability +12 56423700 56425000 High Signal Region +12 56747800 56752200 High Signal Region +12 56911000 56914000 High Signal Region +12 58294800 58339800 High Signal Region +12 58659000 58692900 High Signal Region +12 58858800 58867600 High Signal Region +12 59034800 59039300 Low Mappability +12 59112800 59124700 High Signal Region +12 59270000 59276700 High Signal Region +12 59297800 59323200 High Signal Region +12 59601000 59605800 High Signal Region +12 60069500 60084400 High Signal Region +12 60501200 60506200 High Signal Region +12 61044200 61045300 High Signal Region +12 61289100 61293700 High Signal Region +12 61892600 61896100 High Signal Region +12 61964500 61971300 High Signal Region +12 62035300 62090200 High Signal Region +12 62959800 62999500 High Signal Region +12 63041800 63048200 High Signal Region +12 63289500 63322400 High Signal Region +12 63728400 63745100 High Signal Region +12 63838200 63840100 High Signal Region +12 65260100 65292400 High Signal Region +12 65784500 65808300 High Signal Region +12 66103800 66127200 High Signal Region +12 67058200 67060800 High Signal Region +12 67433500 67459300 High Signal Region +12 67519200 67571500 High Signal Region +12 67828900 67836600 High Signal Region +12 68696500 68711800 High Signal Region +12 68745100 68750600 Low Mappability +12 69059900 69061300 High Signal Region +12 69653100 69657800 High Signal Region +12 70641800 70668400 Low Mappability +12 71077100 71093600 Low Mappability +12 71589600 71596000 High Signal Region +12 72203000 72209300 High Signal Region +12 72634700 72641300 High Signal Region +12 74620800 74642100 High Signal Region +12 74775800 74778200 High Signal Region +12 74803000 74805400 High Signal Region +12 74857200 74862700 High Signal Region +12 75241800 75248400 High Signal Region +12 77160700 77166000 High Signal Region +12 77383500 77411300 High Signal Region +12 77547200 77553900 High Signal Region +12 78260000 78373200 High Signal Region +12 78462400 78468500 High Signal Region +12 80417200 80449700 High Signal Region +12 80894500 80916600 High Signal Region +12 81550400 81555100 High Signal Region +12 81985400 82064000 Low Mappability +12 83093000 83094900 High Signal Region +12 85401000 85408600 High Signal Region +12 87585600 87771500 Low Mappability +12 87802800 88006400 High Signal Region +12 88119800 88169700 Low Mappability +12 88229600 88312400 High Signal Region +12 88493200 88516700 Low Mappability +12 91221400 91256000 High Signal Region +12 91439200 91475500 High Signal Region +12 92393800 92395800 Low Mappability +12 92839700 92892700 High Signal Region +12 93233800 93265600 High Signal Region +12 93564200 93590500 High Signal Region +12 93915400 93951600 High Signal Region +12 94268500 94273900 High Signal Region +12 94550200 94556100 High Signal Region +12 94694300 94713700 High Signal Region +12 95976100 96021400 High Signal Region +12 97038100 97062700 High Signal Region +12 97616600 97622400 High Signal Region +12 98173700 98176600 High Signal Region +12 99644200 99649400 High Signal Region +12 100490600 100492300 High Signal Region +12 100766900 100825300 High Signal Region +12 101427900 101453500 High Signal Region +12 101839700 101849500 High Signal Region +12 102892000 102893900 High Signal Region +12 103458100 103472900 High Signal Region +12 103776900 103813700 High Signal Region +12 105300300 105307000 High Signal Region +12 105435200 105437100 High Signal Region +12 105523800 105525700 High Signal Region +12 105628200 105631400 High Signal Region +12 108078800 108084400 High Signal Region +12 109901900 109909200 Low Mappability +12 110011800 110013700 High Signal Region +12 111388200 111417100 High Signal Region +12 112542200 112548700 High Signal Region +12 112775700 112830900 Low Mappability +12 113423500 113461500 High Signal Region +12 114584600 114597100 High Signal Region +12 114941500 114943900 High Signal Region +12 115725800 115748700 High Signal Region +12 116796500 116853000 High Signal Region +12 118341100 118358400 High Signal Region +12 118794900 118797400 High Signal Region +12 119013600 119018100 High Signal Region +12 119554500 119598100 High Signal Region +12 119659100 119670900 High Signal Region +12 120023800 120129000 High Signal Region +13 0 3038200 High Signal Region +13 3350900 3378900 High Signal Region +13 3404500 3438200 High Signal Region +13 3901100 3903100 Low Mappability +13 4762900 4770300 High Signal Region +13 5171400 5178400 High Signal Region +13 7601300 7604100 High Signal Region +13 7806100 7810900 High Signal Region +13 7893500 7899700 High Signal Region +13 9828900 9855900 High Signal Region +13 10174800 10181100 Low Mappability +13 12684400 13073000 High Signal Region +13 13752100 13774000 High Signal Region +13 13859900 13907900 High Signal Region +13 13981000 13983000 High Signal Region +13 14690600 14777500 Low Mappability +13 18932700 18963600 Low Mappability +13 21753300 21847200 Low Mappability +13 23620800 23647900 Low Mappability +13 25006900 25051500 High Signal Region +13 26440600 26448200 High Signal Region +13 27164600 27169100 High Signal Region +13 27875800 27888500 High Signal Region +13 29880700 29886800 Low Mappability +13 32889400 32895200 High Signal Region +13 33280200 33319400 High Signal Region +13 33350500 33491800 High Signal Region +13 35687400 35695700 High Signal Region +13 36794200 36797400 High Signal Region +13 37036700 37043900 High Signal Region +13 38633900 38659300 Low Mappability +13 42435800 42437700 High Signal Region +13 44868600 44870900 High Signal Region +13 46316600 46324000 High Signal Region +13 50633400 50741800 High Signal Region +13 53269000 53270900 High Signal Region +13 60675600 60682600 High Signal Region +13 62291600 62346800 Low Mappability +13 62409800 62426300 High Signal Region +13 63142500 63184600 High Signal Region +13 64878100 64885300 High Signal Region +13 65352900 66254300 Low Mappability +13 71381400 71387500 High Signal Region +13 74521500 74565200 High Signal Region +13 74684000 74712200 High Signal Region +13 76472300 76501300 High Signal Region +13 77304000 77305900 High Signal Region +13 77430600 77440000 High Signal Region +13 79563400 79570800 High Signal Region +13 80276300 80279400 High Signal Region +13 80489100 80491400 High Signal Region +13 83419000 83444300 High Signal Region +13 85125800 85145900 High Signal Region +13 86149500 86190600 High Signal Region +13 86502700 86511700 High Signal Region +13 88324900 88345400 High Signal Region +13 92599100 92625400 Low Mappability +13 93279200 93294800 High Signal Region +13 93650100 93651500 High Signal Region +13 93940300 93955300 High Signal Region +13 94016300 94020800 High Signal Region +13 97189600 97206100 High Signal Region +13 98418200 98420500 Low Mappability +13 99774000 99792100 High Signal Region +13 102381900 102387900 High Signal Region +13 105123500 105128600 Low Mappability +13 107839000 107860300 Low Mappability +13 110602100 110615800 High Signal Region +13 110729600 110745400 High Signal Region +13 111187700 111189500 High Signal Region +13 111499700 111515900 Low Mappability +13 112577200 112595200 High Signal Region +13 113171200 113173100 High Signal Region +13 113272600 113310700 High Signal Region +13 115498200 115504200 High Signal Region +13 115741300 115743200 Low Mappability +13 116191900 116193900 High Signal Region +13 119188100 119230700 High Signal Region +13 119486800 119618500 High Signal Region +13 119660800 119674100 High Signal Region +13 119899200 120147600 Low Mappability +13 120320500 120421600 High Signal Region +14 0 4323000 High Signal Region +14 4372100 4741400 High Signal Region +14 4762800 5839200 High Signal Region +14 5959700 6479300 High Signal Region +14 6500100 6791800 High Signal Region +14 6993800 7734200 High Signal Region +14 7869900 7872200 High Signal Region +14 8005200 8018900 High Signal Region +14 8285700 8287800 High Signal Region +14 8652200 8658800 Low Mappability +14 10086500 10118400 High Signal Region +14 10178800 10198700 Low Mappability +14 11046200 11050200 High Signal Region +14 12536700 12538700 High Signal Region +14 14333600 14340200 High Signal Region +14 15460700 15467200 High Signal Region +14 16907800 16914000 High Signal Region +14 16937900 16941100 High Signal Region +14 18487900 18494100 High Signal Region +14 19251900 19255700 High Signal Region +14 19277200 19279100 High Signal Region +14 19414800 19633500 High Signal Region +14 21360400 21366100 High Signal Region +14 21878600 21884500 High Signal Region +14 22542900 22570000 High Signal Region +14 22902100 22934800 High Signal Region +14 25875200 26292200 High Signal Region +14 26946900 26948800 High Signal Region +14 29001300 29003200 Low Mappability +14 29343900 29345700 Low Mappability +14 30748800 30754700 High Signal Region +14 31919300 31923900 High Signal Region +14 32115300 32120500 Low Mappability +14 33667700 33670000 Low Mappability +14 33981000 33987500 Low Mappability +14 35275300 35281500 High Signal Region +14 35709400 35722200 High Signal Region +14 36429100 36440100 High Signal Region +14 37229100 37260800 Low Mappability +14 37619400 37635200 Low Mappability +14 38086800 38116800 High Signal Region +14 38280800 38283100 High Signal Region +14 38455100 38462200 Low Mappability +14 39580800 39607200 High Signal Region +14 39731900 39737200 High Signal Region +14 39905500 39911100 High Signal Region +14 41053200 41061900 Low Mappability +14 41326900 43109000 High Signal Region +14 43132400 43668900 High Signal Region +14 43803900 43850200 High Signal Region +14 44149300 44152100 High Signal Region +14 44273800 44343500 High Signal Region +14 44514200 44516000 Low Mappability +14 45726200 45753500 High Signal Region +14 45811900 45813800 High Signal Region +14 46269900 46274300 High Signal Region +14 47609500 47630400 High Signal Region +14 50538900 50606000 High Signal Region +14 50626200 50638500 High Signal Region +14 51472000 51515400 High Signal Region +14 51730700 51768100 High Signal Region +14 51814200 51837200 High Signal Region +14 52821200 53035800 Low Mappability +14 53146700 53340000 High Signal Region +14 53475200 53479600 High Signal Region +14 53515600 53530500 Low Mappability +14 56447800 56455700 High Signal Region +14 56693100 56695000 High Signal Region +14 58052600 58059800 Low Mappability +14 58462700 58464600 Low Mappability +14 58657800 58659700 High Signal Region +14 58831400 58833300 High Signal Region +14 59250300 59270000 High Signal Region +14 59488900 59490800 High Signal Region +14 59980800 59995700 High Signal Region +14 60328300 60357300 High Signal Region +14 60960000 60961900 Low Mappability +14 61580500 61586700 High Signal Region +14 61855000 61856300 High Signal Region +14 62107300 62126200 High Signal Region +14 64290100 64292500 High Signal Region +14 64463300 64478500 Low Mappability +14 65128900 65135300 Low Mappability +14 66427000 66428400 High Signal Region +14 68232600 68278200 High Signal Region +14 69161000 69163400 High Signal Region +14 70974500 70975600 High Signal Region +14 71121300 71126700 High Signal Region +14 71449700 71453700 High Signal Region +14 71783600 71804000 High Signal Region +14 72900100 72921400 High Signal Region +14 73644600 73679900 High Signal Region +14 73847900 73861200 High Signal Region +14 74039300 74066900 High Signal Region +14 74124400 74138500 High Signal Region +14 74435600 74447800 High Signal Region +14 75425300 75440500 High Signal Region +14 78162300 78168200 High Signal Region +14 78401700 78403200 High Signal Region +14 79145300 79196400 High Signal Region +14 80148100 80150800 High Signal Region +14 80422800 80439400 High Signal Region +14 80622600 80627700 High Signal Region +14 81333200 81337500 High Signal Region +14 81495300 81519300 High Signal Region +14 82077600 82084900 High Signal Region +14 82846900 82867200 High Signal Region +14 82958700 82964100 High Signal Region +14 83292900 83306500 High Signal Region +14 83507000 83512600 High Signal Region +14 84354700 84409800 High Signal Region +14 84855100 84881600 Low Mappability +14 85177800 85203300 Low Mappability +14 85521200 85535200 Low Mappability +14 86198000 86200000 High Signal Region +14 86590500 86614400 High Signal Region +14 87354600 87373000 High Signal Region +14 87671400 87677500 High Signal Region +14 87790500 87852200 High Signal Region +14 88450200 88453600 High Signal Region +14 88478400 88480300 High Signal Region +14 90018300 90019500 High Signal Region +14 90294700 90301800 High Signal Region +14 90910200 90912200 High Signal Region +14 91415900 91418400 High Signal Region +14 91510800 91514900 High Signal Region +14 91672700 91694800 High Signal Region +14 91951700 91976400 High Signal Region +14 92032500 92040900 High Signal Region +14 92383600 92389900 High Signal Region +14 92411600 92432900 High Signal Region +14 92792600 92798500 High Signal Region +14 92921100 92953200 High Signal Region +14 93017600 93020400 High Signal Region +14 93355600 93360200 High Signal Region +14 94319700 94327000 High Signal Region +14 95561600 95567600 High Signal Region +14 96048000 96054300 High Signal Region +14 96093600 96116100 High Signal Region +14 97323800 97326500 High Signal Region +14 98226800 98237000 High Signal Region +14 98731900 98757200 High Signal Region +14 99207100 99208200 High Signal Region +14 99649700 99655500 High Signal Region +14 101076400 101098900 Low Mappability +14 101404800 101414800 High Signal Region +14 102548900 102565300 High Signal Region +14 102755800 102762600 High Signal Region +14 103300300 103302400 High Signal Region +14 103858600 103872900 High Signal Region +14 103999500 104025500 High Signal Region +14 104104800 104128100 Low Mappability +14 104704500 104716800 High Signal Region +14 105758200 105764900 Low Mappability +14 105911400 105978300 High Signal Region +14 106002700 106005700 Low Mappability +14 106301000 106352700 High Signal Region +14 106444800 106483100 Low Mappability +14 106722600 106728700 High Signal Region +14 106895300 106897000 Low Mappability +14 108115100 108174900 Low Mappability +14 108283900 108303500 High Signal Region +14 109675300 109681200 High Signal Region +14 109911500 109917800 High Signal Region +14 110057000 110108200 Low Mappability +14 110356200 110373800 High Signal Region +14 110492000 110495700 Low Mappability +14 110906100 110908200 High Signal Region +14 110992800 110994500 High Signal Region +14 111903200 111909800 High Signal Region +14 112074600 112092300 High Signal Region +14 112210500 112215800 High Signal Region +14 112285400 112291900 High Signal Region +14 112332800 112340000 Low Mappability +14 112517900 112519900 High Signal Region +14 112627800 112663100 Low Mappability +14 114505900 114512900 High Signal Region +14 114822000 114823900 Low Mappability +14 115109700 115117400 High Signal Region +14 115272500 115280200 High Signal Region +14 115379200 115385600 High Signal Region +14 115911100 115912900 High Signal Region +14 115958100 115965000 High Signal Region +14 116402700 116407700 High Signal Region +14 116817000 116822900 High Signal Region +14 117285800 117292800 High Signal Region +14 118144700 118168500 Low Mappability +14 119286000 119287900 High Signal Region +14 120180000 120202600 High Signal Region +14 120742600 120749700 High Signal Region +14 120777500 120802300 High Signal Region +14 121007000 121010900 Low Mappability +14 122502500 122534800 High Signal Region +14 123349400 123351300 Low Mappability +14 123412000 123452600 High Signal Region +14 123674600 123695600 High Signal Region +14 124334000 124340200 High Signal Region +14 124415600 124436400 High Signal Region +14 124491600 124497700 High Signal Region +14 124739500 124902200 High Signal Region +15 0 3125600 High Signal Region +15 3150900 3170400 High Signal Region +15 3313900 3336200 High Signal Region +15 3360500 3363700 High Signal Region +15 3538600 3551000 High Signal Region +15 3712200 3732700 High Signal Region +15 3793500 3823000 High Signal Region +15 4155900 4160900 High Signal Region +15 4278500 4284100 High Signal Region +15 4852000 4894600 Low Mappability +15 4980200 4987600 Low Mappability +15 5369000 5385500 High Signal Region +15 5681700 5690400 High Signal Region +15 5910000 5911700 High Signal Region +15 5993500 5995400 High Signal Region +15 6074100 6087100 Low Mappability +15 6192800 6200000 Low Mappability +15 6316000 6317900 High Signal Region +15 6510500 6539100 High Signal Region +15 6674800 6701400 High Signal Region +15 6801200 6808300 High Signal Region +15 7539900 7548600 Low Mappability +15 7800800 7803000 Low Mappability +15 7849400 7855600 High Signal Region +15 7904400 7929500 Low Mappability +15 8517500 8520400 High Signal Region +15 8548000 8576100 Low Mappability +15 8800200 8808700 High Signal Region +15 8985200 9054800 High Signal Region +15 9219000 9224900 Low Mappability +15 9293200 9333300 High Signal Region +15 9379300 9409100 High Signal Region +15 9437100 9443600 High Signal Region +15 9536500 9554100 High Signal Region +15 9992700 10045700 High Signal Region +15 10579600 10591500 Low Mappability +15 10753400 10810200 High Signal Region +15 10835200 10854700 Low Mappability +15 11921000 11933300 High Signal Region +15 12055800 12063200 Low Mappability +15 12526800 12531900 Low Mappability +15 12872000 12873900 High Signal Region +15 12932300 12934200 Low Mappability +15 13919500 13948300 High Signal Region +15 14414600 14439100 Low Mappability +15 14722200 14732900 High Signal Region +15 14873900 14902400 High Signal Region +15 15043600 15059700 High Signal Region +15 15525500 15551900 High Signal Region +15 16168200 16186400 High Signal Region +15 16303700 16309500 High Signal Region +15 16716400 16717500 High Signal Region +15 16901300 16907100 High Signal Region +15 16939800 16955100 Low Mappability +15 17139000 17169100 High Signal Region +15 17562100 17581400 High Signal Region +15 18314600 18325000 High Signal Region +15 19038400 19063800 Low Mappability +15 19402600 19405500 High Signal Region +15 19448100 19453900 High Signal Region +15 19557200 19578000 High Signal Region +15 19626800 19631800 High Signal Region +15 19678400 19685800 High Signal Region +15 20063000 20067500 High Signal Region +15 20155100 20170700 Low Mappability +15 20474900 20510100 High Signal Region +15 20531400 20537100 High Signal Region +15 20821500 20826700 High Signal Region +15 20972700 20978300 Low Mappability +15 21114000 21115900 High Signal Region +15 21262100 21268500 Low Mappability +15 21423200 21487200 High Signal Region +15 21655500 21657500 High Signal Region +15 21815500 21820800 High Signal Region +15 21853700 21892400 High Signal Region +15 22268700 22293500 High Signal Region +15 22751400 22756700 Low Mappability +15 22799300 22809700 Low Mappability +15 23240200 23255600 Low Mappability +15 23465300 23467800 High Signal Region +15 23886000 23887900 Low Mappability +15 23926900 23939700 High Signal Region +15 24309300 24325700 Low Mappability +15 24761100 24766700 High Signal Region +15 24801600 24837300 High Signal Region +15 24880900 24898600 Low Mappability +15 25051400 25065200 Low Mappability +15 26112700 26118900 High Signal Region +15 26905000 26919300 Low Mappability +15 27286100 27326800 High Signal Region +15 27384100 27390300 Low Mappability +15 27638200 27640500 High Signal Region +15 28564400 28578800 High Signal Region +15 29285200 29291500 Low Mappability +15 29347600 29395600 High Signal Region +15 29463900 29470200 High Signal Region +15 29969800 30001400 High Signal Region +15 30117700 30126200 High Signal Region +15 30441400 30448200 Low Mappability +15 30747900 30755000 High Signal Region +15 30996700 31016300 High Signal Region +15 31066700 31083700 High Signal Region +15 32783900 32806700 High Signal Region +15 32832800 32880300 High Signal Region +15 33138700 33140800 Low Mappability +15 33308700 33310800 Low Mappability +15 33444200 33454100 High Signal Region +15 33710200 33745700 High Signal Region +15 33781400 33849400 High Signal Region +15 33869800 33884700 High Signal Region +15 34494500 34502100 Low Mappability +15 34763100 34769400 High Signal Region +15 34987600 34992800 High Signal Region +15 35013200 35015400 High Signal Region +15 35366800 35406000 High Signal Region +15 36715200 36737400 High Signal Region +15 36966700 36997400 Low Mappability +15 37072900 37150800 Low Mappability +15 38462300 38484300 Low Mappability +15 39172900 39178300 Low Mappability +15 39335600 39348800 Low Mappability +15 39496100 39499100 High Signal Region +15 39695600 39718600 Low Mappability +15 40049600 40056000 High Signal Region +15 40086800 40101400 High Signal Region +15 41531400 41533200 High Signal Region +15 41890400 41896900 Low Mappability +15 42354900 42361100 High Signal Region +15 42925300 42942800 High Signal Region +15 43287300 43346300 High Signal Region +15 44469100 44476400 High Signal Region +15 44649000 44659600 Low Mappability +15 44723200 44728200 Low Mappability +15 44769700 44796100 High Signal Region +15 45005100 45009300 High Signal Region +15 45194600 45197100 High Signal Region +15 45577500 45590900 High Signal Region +15 45635600 45650500 High Signal Region +15 45774400 45779700 High Signal Region +15 45890700 45932500 High Signal Region +15 46255700 46257800 Low Mappability +15 46355600 46368400 High Signal Region +15 46502200 46506800 Low Mappability +15 46562500 46566200 Low Mappability +15 47232800 47256000 High Signal Region +15 47356500 47363700 Low Mappability +15 47539000 47555300 High Signal Region +15 48666900 48671000 High Signal Region +15 49283300 49299700 High Signal Region +15 49322600 49327300 Low Mappability +15 50426100 50442800 High Signal Region +15 50557700 50642600 High Signal Region +15 51113200 51117800 High Signal Region +15 51531900 51533900 Low Mappability +15 52125800 52131200 High Signal Region +15 52329800 52353100 High Signal Region +15 53039200 53044200 Low Mappability +15 53831000 53834900 High Signal Region +15 53870700 53872700 High Signal Region +15 53918300 53929500 High Signal Region +15 54180700 54211500 Low Mappability +15 56032900 56038200 High Signal Region +15 56175800 56183100 Low Mappability +15 56363800 56367900 High Signal Region +15 56400500 56402200 High Signal Region +15 56941600 56993500 High Signal Region +15 57279500 57285000 High Signal Region +15 57412200 57433600 High Signal Region +15 57889500 57913700 Low Mappability +15 58437200 58441100 High Signal Region +15 59421400 59435400 Low Mappability +15 59850100 59875200 Low Mappability +15 60153100 60203900 High Signal Region +15 60592000 60594300 Low Mappability +15 60931800 60986500 High Signal Region +15 61148600 61150700 High Signal Region +15 61903100 61915500 High Signal Region +15 62367600 62370100 High Signal Region +15 62553200 62555200 High Signal Region +15 62686500 62693700 High Signal Region +15 63329400 63346600 Low Mappability +15 63626000 63627900 High Signal Region +15 63791700 63796000 High Signal Region +15 63837600 63922800 High Signal Region +15 64591700 64598200 Low Mappability +15 64673500 64681900 High Signal Region +15 65115600 65123500 Low Mappability +15 65598500 65604500 High Signal Region +15 65666600 65673800 High Signal Region +15 65714400 65753500 High Signal Region +15 66045100 66065700 High Signal Region +15 66208300 66210200 High Signal Region +15 68136300 68137800 Low Mappability +15 68980000 68986500 High Signal Region +15 69122300 69164500 High Signal Region +15 69264900 69268800 High Signal Region +15 69390300 69409400 High Signal Region +15 69642000 69646000 High Signal Region +15 70083000 70088800 High Signal Region +15 70609300 70611100 High Signal Region +15 70896600 70914000 High Signal Region +15 71104600 71112200 High Signal Region +15 71206600 71237500 Low Mappability +15 73060200 73087900 Low Mappability +15 73373200 73378200 Low Mappability +15 73873000 73880400 Low Mappability +15 74360700 74368000 Low Mappability +15 74814300 74826700 Low Mappability +15 74992000 75104600 High Signal Region +15 75205600 75212800 Low Mappability +15 75298000 75299500 High Signal Region +15 75437000 75440500 High Signal Region +15 75523600 75529700 High Signal Region +15 76102000 76106500 High Signal Region +15 76559900 76577900 Low Mappability +15 76964600 76971400 Low Mappability +15 77336200 77439100 High Signal Region +15 77718300 77735600 Low Mappability +15 77895000 77934800 Low Mappability +15 79685000 79775700 Low Mappability +15 79869700 79892600 Low Mappability +15 79974400 79978400 Low Mappability +15 80232400 80267100 High Signal Region +15 81145400 81152000 Low Mappability +15 81492300 81523600 High Signal Region +15 82338000 82368000 Low Mappability +15 82590700 82608900 Low Mappability +15 82675500 82677200 High Signal Region +15 83172100 83202200 Low Mappability +15 84746600 84753000 Low Mappability +15 85176800 85196600 Low Mappability +15 85541200 85543100 High Signal Region +15 86193800 86196100 High Signal Region +15 86312100 86326400 Low Mappability +15 87293900 87301200 Low Mappability +15 87967000 87969000 High Signal Region +15 88779400 88783900 Low Mappability +15 88974800 88976800 High Signal Region +15 89597900 89621300 High Signal Region +15 89808500 89809700 High Signal Region +15 89943000 89982000 Low Mappability +15 90636400 90643600 Low Mappability +15 91115900 91134800 Low Mappability +15 91419400 91422200 High Signal Region +15 91720600 91723200 Low Mappability +15 91905900 91911200 High Signal Region +15 92470100 92475100 Low Mappability +15 92613700 92618300 Low Mappability +15 92722600 92730100 Low Mappability +15 92796100 92820000 Low Mappability +15 93044100 93062000 High Signal Region +15 93467800 93469500 Low Mappability +15 93867100 93873600 High Signal Region +15 94088400 94124100 High Signal Region +15 94150500 94156800 High Signal Region +15 94373000 94379600 High Signal Region +15 95087600 95092100 High Signal Region +15 95306000 95312300 High Signal Region +15 95729500 95756400 High Signal Region +15 96551700 96559500 Low Mappability +15 96977900 96983600 Low Mappability +15 97082100 97084300 High Signal Region +15 97472900 97487400 Low Mappability +15 99168800 99171900 High Signal Region +15 99552100 99553900 Low Mappability +15 100331500 100339800 Low Mappability +15 100360000 100379700 Low Mappability +15 100541700 100617400 Low Mappability +15 101655700 101662100 High Signal Region +15 102596800 102603200 High Signal Region +15 103271900 103277100 High Signal Region +15 103406700 103418500 High Signal Region +15 103606700 103611400 High Signal Region +15 103814500 104043600 High Signal Region +16 0 3427800 High Signal Region +16 3450300 3519700 Low Mappability +16 4300400 4366800 Low Mappability +16 4585000 4591300 High Signal Region +16 5708200 5710200 High Signal Region +16 7460800 7463600 High Signal Region +16 7937100 7958400 Low Mappability +16 8256700 8286200 High Signal Region +16 9577100 9579600 Low Mappability +16 10631200 10633200 Low Mappability +16 10974100 11013900 High Signal Region +16 11134600 11145200 High Signal Region +16 11248000 11249900 Low Mappability +16 11679900 11687500 Low Mappability +16 12327300 12345900 Low Mappability +16 12417900 12423400 High Signal Region +16 12829200 12831000 High Signal Region +16 12976200 12981700 Low Mappability +16 13087700 13107000 Low Mappability +16 13903200 13925900 Low Mappability +16 14316200 14341200 Low Mappability +16 15502700 15510100 Low Mappability +16 15741400 15757700 Low Mappability +16 17199900 17236000 High Signal Region +16 17751400 17761300 High Signal Region +16 17910400 17955500 High Signal Region +16 18532200 18534200 High Signal Region +16 18957500 18979200 High Signal Region +16 19334200 19375100 High Signal Region +16 19581200 19602400 Low Mappability +16 19711900 19748700 High Signal Region +16 19928600 19946300 Low Mappability +16 22923300 22929100 High Signal Region +16 26419300 26421200 High Signal Region +16 26808500 26814800 High Signal Region +16 27071900 27087600 High Signal Region +16 27212200 27218300 High Signal Region +16 28170600 28197500 High Signal Region +16 30828600 30830500 High Signal Region +16 31223800 31234300 Low Mappability +16 31339100 31358900 High Signal Region +16 31818700 31825200 Low Mappability +16 32147700 32153500 Low Mappability +16 32489700 32520100 Low Mappability +16 32579100 32598800 Low Mappability +16 33847200 33852600 Low Mappability +16 34581100 34591200 Low Mappability +16 34742000 34744000 High Signal Region +16 35980600 35983300 High Signal Region +16 36764900 36770500 Low Mappability +16 38714200 38721600 Low Mappability +16 39563700 39568200 High Signal Region +16 41270700 41273100 High Signal Region +16 42657300 42661200 High Signal Region +16 42773100 42779900 High Signal Region +16 42931600 42950000 High Signal Region +16 43764000 43771600 Low Mappability +16 44040400 44063900 Low Mappability +16 44709800 44726400 Low Mappability +16 44920200 44950700 Low Mappability +16 45292600 45293900 High Signal Region +16 45352100 45354000 High Signal Region +16 46364600 46369100 High Signal Region +16 47099100 47147300 High Signal Region +16 47552300 47564100 Low Mappability +16 48579900 48581300 Low Mappability +16 49024900 49031400 Low Mappability +16 49148400 49150300 Low Mappability +16 49447700 49489300 High Signal Region +16 50084900 50101400 Low Mappability +16 50909100 50926800 Low Mappability +16 51087100 51094300 Low Mappability +16 51945800 51980200 High Signal Region +16 53412000 53428900 High Signal Region +16 53571500 53595400 Low Mappability +16 54298300 54307600 Low Mappability +16 54861600 54869000 High Signal Region +16 54959000 54965200 High Signal Region +16 55647800 55681600 Low Mappability +16 56038100 56065100 Low Mappability +16 56988400 57008400 High Signal Region +16 57085500 57095800 High Signal Region +16 57390200 57392600 High Signal Region +16 57792800 57811700 Low Mappability +16 58310800 58343000 High Signal Region +16 58632300 58670400 Low Mappability +16 59121800 59129100 Low Mappability +16 59310100 59378100 High Signal Region +16 60921200 60970900 High Signal Region +16 61312500 61325200 Low Mappability +16 62564300 62599200 High Signal Region +16 62875900 62880400 Low Mappability +16 63114300 63151200 High Signal Region +16 63301300 63313600 High Signal Region +16 64384600 64425600 High Signal Region +16 65176900 65181400 Low Mappability +16 66229300 66247600 Low Mappability +16 67328200 67334700 High Signal Region +16 68272300 68274300 High Signal Region +16 70542300 70558300 Low Mappability +16 70633900 70639700 Low Mappability +16 70892400 70898400 High Signal Region +16 70976900 70982900 High Signal Region +16 71687000 71691500 Low Mappability +16 72019300 72023900 Low Mappability +16 72056200 72062100 High Signal Region +16 72724800 72730900 Low Mappability +16 73656700 73688600 High Signal Region +16 74771800 74781500 Low Mappability +16 76057000 76065000 Low Mappability +16 76487100 76519600 High Signal Region +16 76988700 76991600 High Signal Region +16 77116900 77121900 Low Mappability +16 78977100 79013600 High Signal Region +16 79368600 79376000 Low Mappability +16 79782000 79786700 High Signal Region +16 79943000 79948600 Low Mappability +16 80269400 80309700 Low Mappability +16 81071700 81079200 Low Mappability +16 81779900 81782000 High Signal Region +16 81859300 81865600 High Signal Region +16 82079700 82099600 High Signal Region +16 82237800 82243200 Low Mappability +16 82828200 82845600 High Signal Region +16 83077300 83081800 High Signal Region +16 83360600 83368000 Low Mappability +16 84260500 84283300 High Signal Region +16 84380600 84407600 High Signal Region +16 84440100 84446000 High Signal Region +16 85671600 85673000 High Signal Region +16 85713500 85720100 High Signal Region +16 86333000 86354300 High Signal Region +16 86539500 86570300 High Signal Region +16 86819800 86822100 High Signal Region +16 87055400 87060300 High Signal Region +16 87287400 87302500 Low Mappability +16 87372300 87391700 Low Mappability +16 88022900 88029900 High Signal Region +16 88790600 88797900 Low Mappability +16 88957900 88967800 High Signal Region +16 89145200 89196100 Low Mappability +16 89431800 89448400 Low Mappability +16 89636000 89642900 High Signal Region +16 89877500 89879700 High Signal Region +16 90056200 90072300 Low Mappability +16 90341200 90350100 Low Mappability +16 91533700 91551800 High Signal Region +16 92254500 92259400 Low Mappability +16 93581500 93622800 High Signal Region +16 93685800 93711200 High Signal Region +16 93785700 93790200 High Signal Region +16 93991400 93997900 High Signal Region +16 94258100 94282000 Low Mappability +16 95782000 95788900 High Signal Region +16 95991000 96010400 Low Mappability +16 97996400 98207700 High Signal Region +17 0 3039300 High Signal Region +17 3075400 3085400 High Signal Region +17 3378900 3380800 High Signal Region +17 5863900 5885100 High Signal Region +17 6219100 6717500 High Signal Region +17 6877300 7037900 High Signal Region +17 7302300 7430200 High Signal Region +17 7615300 7617200 High Signal Region +17 7950200 8052300 High Signal Region +17 11097900 11105100 High Signal Region +17 13018500 13469100 High Signal Region +17 13492200 13555800 High Signal Region +17 13584800 13656200 High Signal Region +17 14961200 15054300 Low Mappability +17 20859400 20865200 High Signal Region +17 23426600 23537000 High Signal Region +17 23730600 23732500 High Signal Region +17 24095300 24097300 High Signal Region +17 29101000 29109600 High Signal Region +17 31569500 31571400 High Signal Region +17 35367400 35480300 Low Mappability +17 36230300 36232500 High Signal Region +17 38498200 38500800 High Signal Region +17 39842000 39849700 High Signal Region +17 40422500 40427000 High Signal Region +17 50569500 50571400 High Signal Region +17 53034300 53056100 High Signal Region +17 53151500 53153500 High Signal Region +17 53807400 53820300 High Signal Region +17 54112300 54134200 High Signal Region +17 57368400 57399900 High Signal Region +17 62736600 62738500 High Signal Region +17 66798500 66800400 High Signal Region +17 67740400 67742500 High Signal Region +17 70962200 70964800 High Signal Region +17 82975900 82991600 High Signal Region +17 84458800 84464500 Low Mappability +17 85264100 85266000 High Signal Region +17 93017000 93047400 High Signal Region +17 93623500 93646700 High Signal Region +17 94886200 94987200 High Signal Region +18 0 3063700 High Signal Region +18 3085500 3142600 High Signal Region +18 3568100 3570100 Low Mappability +18 3619800 3652100 Low Mappability +18 3779700 3785600 High Signal Region +18 3815100 3819300 High Signal Region +18 3873200 3889000 High Signal Region +18 4194700 4199900 High Signal Region +18 4456700 4504600 High Signal Region +18 4658000 4664400 Low Mappability +18 4695200 4701800 Low Mappability +18 5499400 5502000 Low Mappability +18 5895900 5900400 Low Mappability +18 6043700 6046600 Low Mappability +18 6343100 6376400 Low Mappability +18 6663800 6669200 High Signal Region +18 6796200 6803600 Low Mappability +18 6853600 6868500 Low Mappability +18 7032800 7035500 High Signal Region +18 7527500 7534800 High Signal Region +18 7782300 7798400 High Signal Region +18 7998000 8018800 Low Mappability +18 8164900 8183000 High Signal Region +18 8243000 8271800 High Signal Region +18 8292000 8294000 Low Mappability +18 8721900 8747000 High Signal Region +18 9095200 9127300 High Signal Region +18 9248500 9269200 Low Mappability +18 9420000 9426100 High Signal Region +18 9890700 9915900 High Signal Region +18 11168900 11192100 High Signal Region +18 11247700 11293200 High Signal Region +18 11626000 11648000 Low Mappability +18 12945100 12956300 High Signal Region +18 13030000 13041900 High Signal Region +18 13161400 13180500 High Signal Region +18 13241200 13251100 Low Mappability +18 13296400 13300000 High Signal Region +18 13513200 13517200 High Signal Region +18 14732900 14739600 Low Mappability +18 15225500 15232800 High Signal Region +18 15366900 15382100 High Signal Region +18 15695100 15737600 High Signal Region +18 16283100 16288900 High Signal Region +18 16988600 17013600 Low Mappability +18 17116100 17119600 High Signal Region +18 17346100 17352400 High Signal Region +18 17425100 17480600 High Signal Region +18 17513300 17517900 High Signal Region +18 17541300 17559000 High Signal Region +18 17593300 17598500 High Signal Region +18 17938300 17951600 Low Mappability +18 18816600 18823800 High Signal Region +18 18916300 18917900 High Signal Region +18 18976900 18992400 High Signal Region +18 19240600 19289100 High Signal Region +18 19345800 19352600 Low Mappability +18 19430400 19448100 High Signal Region +18 19679600 19681600 Low Mappability +18 19812100 19836500 High Signal Region +18 20352500 20369800 High Signal Region +18 20896200 20910000 Low Mappability +18 21261800 21268900 Low Mappability +18 21528200 21541600 High Signal Region +18 21943200 21945200 Low Mappability +18 22297400 22304000 High Signal Region +18 23186200 23215300 High Signal Region +18 25045100 25047300 High Signal Region +18 25253000 25259500 High Signal Region +18 25905600 25928600 High Signal Region +18 26003000 26008100 Low Mappability +18 26829800 26837100 Low Mappability +18 26998200 27005600 Low Mappability +18 27062000 27068200 High Signal Region +18 28151300 28167300 High Signal Region +18 28441700 28446600 Low Mappability +18 28482900 28484900 High Signal Region +18 28814100 28816900 High Signal Region +18 28960100 28966000 Low Mappability +18 29014700 29022000 High Signal Region +18 29557800 29559800 High Signal Region +18 29713000 29719200 High Signal Region +18 31281100 31294300 High Signal Region +18 32758400 32793400 High Signal Region +18 33212800 33221500 Low Mappability +18 33275100 33331000 High Signal Region +18 33697400 33722600 Low Mappability +18 34083600 34087300 Low Mappability +18 34397100 34409800 Low Mappability +18 35318500 35320400 Low Mappability +18 36454200 36494600 Low Mappability +18 36981500 36988700 Low Mappability +18 37031800 37045800 High Signal Region +18 37364600 37398900 Low Mappability +18 37545500 37645000 High Signal Region +18 39598600 39604900 High Signal Region +18 40306300 40309300 High Signal Region +18 40708500 40713600 Low Mappability +18 41381600 41387500 High Signal Region +18 41465300 41471500 High Signal Region +18 41820100 41826100 High Signal Region +18 41960600 41966100 High Signal Region +18 42556800 42559800 High Signal Region +18 42913000 42914900 High Signal Region +18 43335500 43337900 High Signal Region +18 43889500 43900400 High Signal Region +18 44033600 44050200 High Signal Region +18 44228000 44263100 High Signal Region +18 44291600 44295600 High Signal Region +18 44361600 44380500 High Signal Region +18 44873100 44875100 Low Mappability +18 44981000 45032700 High Signal Region +18 45131400 45133400 High Signal Region +18 45291700 45314300 Low Mappability +18 45357300 45364700 Low Mappability +18 45392200 45397700 High Signal Region +18 45506800 45513400 High Signal Region +18 45998300 46038000 Low Mappability +18 46082000 46101400 High Signal Region +18 46439100 46444100 Low Mappability +18 46791400 46793400 Low Mappability +18 47648600 47654100 Low Mappability +18 47769900 47783100 Low Mappability +18 48009500 48011400 High Signal Region +18 48208100 48220300 High Signal Region +18 48705800 48713100 Low Mappability +18 48831300 48836100 High Signal Region +18 49387700 49397800 High Signal Region +18 49669200 49695600 High Signal Region +18 50253400 50268700 High Signal Region +18 50632100 50700200 Low Mappability +18 51072000 51077600 Low Mappability +18 51658600 51698300 High Signal Region +18 52020200 52059300 High Signal Region +18 52256200 52262200 High Signal Region +18 52378900 52395000 Low Mappability +18 52876200 52883200 High Signal Region +18 53828800 53839900 Low Mappability +18 53869300 53876600 Low Mappability +18 54023900 54030000 High Signal Region +18 54288100 54335900 Low Mappability +18 54698000 54707800 High Signal Region +18 55222400 55224400 Low Mappability +18 55311000 55321100 Low Mappability +18 55414800 55436200 Low Mappability +18 55899800 55901700 High Signal Region +18 55938500 55954100 High Signal Region +18 56273000 56276900 High Signal Region +18 56302600 56304500 High Signal Region +18 56341200 56346000 High Signal Region +18 56826900 56830200 Low Mappability +18 57560400 57562500 Low Mappability +18 58992700 58999300 Low Mappability +18 59496300 59511000 High Signal Region +18 59929900 59955000 High Signal Region +18 60042400 60044400 Low Mappability +18 60206100 60238100 High Signal Region +18 60525200 60533800 Low Mappability +18 62237400 62247700 High Signal Region +18 62273700 62292800 Low Mappability +18 62752700 62755100 High Signal Region +18 64131300 64132600 High Signal Region +18 64448400 64454900 Low Mappability +18 65103100 65105000 High Signal Region +18 65385700 65405100 Low Mappability +18 65492400 65494700 Low Mappability +18 65716300 65719400 Low Mappability +18 66543200 66548900 High Signal Region +18 66750000 66759900 Low Mappability +18 66881200 66887200 High Signal Region +18 68381300 68387800 High Signal Region +18 68412100 68425800 Low Mappability +18 68461300 68489000 High Signal Region +18 68691100 68693200 High Signal Region +18 69759300 69761300 Low Mappability +18 70489500 70515400 High Signal Region +18 70775600 70791900 High Signal Region +18 70842100 70849200 Low Mappability +18 71032500 71038800 High Signal Region +18 71139200 71145200 High Signal Region +18 71208200 71211300 Low Mappability +18 71267000 71273300 Low Mappability +18 71630400 71641100 Low Mappability +18 72753900 72794900 High Signal Region +18 72987900 72991000 High Signal Region +18 73259600 73264100 Low Mappability +18 74553100 74566400 High Signal Region +18 74745500 74758500 Low Mappability +18 74880300 74882000 High Signal Region +18 76177900 76184300 Low Mappability +18 76579700 76586300 Low Mappability +18 77264400 77271000 High Signal Region +18 78197300 78199300 High Signal Region +18 78407800 78428500 Low Mappability +18 78861400 78867900 High Signal Region +18 80021700 80028900 Low Mappability +18 80307500 80309600 Low Mappability +18 80455500 80518400 Low Mappability +18 81299700 81306200 Low Mappability +18 82052100 82058200 High Signal Region +18 82160100 82227800 High Signal Region +18 82319500 82339900 High Signal Region +18 82692900 82717900 Low Mappability +18 83171100 83178400 Low Mappability +18 83700500 83707900 Low Mappability +18 84828700 84833000 High Signal Region +18 85035000 85080600 High Signal Region +18 85105800 85112200 High Signal Region +18 85169900 85175900 High Signal Region +18 85377800 85382800 Low Mappability +18 85697000 85699200 High Signal Region +18 85783600 85789900 High Signal Region +18 86508300 86510200 High Signal Region +18 86560600 86586100 High Signal Region +18 86828500 86849500 High Signal Region +18 87006300 87009800 High Signal Region +18 87141500 87161200 High Signal Region +18 87568300 87574300 High Signal Region +18 88149300 88155400 High Signal Region +18 89030400 89036400 High Signal Region +18 89615900 89650500 Low Mappability +18 89983200 89989700 Low Mappability +18 90055500 90092500 High Signal Region +18 90113400 90125400 Low Mappability +18 90464100 90501300 High Signal Region +18 90601200 90702600 High Signal Region +19 0 3140800 High Signal Region +19 3161400 3248600 High Signal Region +19 4061100 4066400 Low Mappability +19 6581000 6594300 High Signal Region +19 7713600 7774800 High Signal Region +19 7810700 7843900 Low Mappability +19 8203200 8285500 Low Mappability +19 9250500 9357700 High Signal Region +19 9502000 9565000 Low Mappability +19 9745800 9803300 High Signal Region +19 9823500 9837700 High Signal Region +19 10507900 10510300 High Signal Region +19 10954500 10960300 Low Mappability +19 11199700 11239800 High Signal Region +19 12447200 12454600 Low Mappability +19 13203500 13216400 High Signal Region +19 13330600 13357100 High Signal Region +19 13685000 13693300 High Signal Region +19 13760500 13777200 High Signal Region +19 15256700 15263000 High Signal Region +19 15433400 15438100 High Signal Region +19 15711800 15719800 High Signal Region +19 15839200 15846600 High Signal Region +19 15956500 15958500 Low Mappability +19 16670500 16673100 High Signal Region +19 18358000 18364200 High Signal Region +19 18532700 18535600 High Signal Region +19 19132200 19161200 High Signal Region +19 19509000 19514900 High Signal Region +19 19870300 19876900 Low Mappability +19 20080700 20081800 High Signal Region +19 20140700 20144100 Low Mappability +19 20288200 20297900 Low Mappability +19 20455400 20462700 Low Mappability +19 20839700 20843900 Low Mappability +19 21218200 21243800 High Signal Region +19 21532400 21534400 Low Mappability +19 22644100 22651700 High Signal Region +19 22722400 22728400 Low Mappability +19 23356500 23358400 High Signal Region +19 23739200 23754000 High Signal Region +19 24040300 24042300 Low Mappability +19 24911900 24919200 High Signal Region +19 25741800 25770100 High Signal Region +19 25917500 25920000 High Signal Region +19 27751400 27758100 High Signal Region +19 28149600 28156600 High Signal Region +19 30907400 30908700 High Signal Region +19 30963600 30968000 Low Mappability +19 31722800 31735800 High Signal Region +19 32203200 32211600 Low Mappability +19 32441800 32449100 Low Mappability +19 32822000 32824000 Low Mappability +19 33439100 33446100 Low Mappability +19 33864200 33877900 High Signal Region +19 33949100 33958200 High Signal Region +19 34131200 34161200 Low Mappability +19 34581900 34613000 High Signal Region +19 35076400 35079800 High Signal Region +19 35650200 35673500 High Signal Region +19 36702500 36723400 High Signal Region +19 37298800 37301800 Low Mappability +19 37617300 37624600 Low Mappability +19 38490200 38495300 Low Mappability +19 39078100 39079500 High Signal Region +19 39106700 39156300 High Signal Region +19 39244700 39270400 High Signal Region +19 39331700 39424100 High Signal Region +19 39599900 39607200 Low Mappability +19 39658700 39695100 Low Mappability +19 40020400 40026800 Low Mappability +19 40094100 40153300 High Signal Region +19 40328500 40330000 Low Mappability +19 41142700 41150000 Low Mappability +19 41424200 41473100 Low Mappability +19 42346000 42350500 Low Mappability +19 42647600 42649700 Low Mappability +19 43118800 43124600 High Signal Region +19 43236000 43238000 Low Mappability +19 43321500 43323700 High Signal Region +19 44145700 44171700 Low Mappability +19 44218500 44225000 Low Mappability +19 44862100 44864300 High Signal Region +19 45004900 45096500 Low Mappability +19 45182300 45190200 High Signal Region +19 45649000 45661500 High Signal Region +19 45699400 45706300 Low Mappability +19 47590300 47602700 Low Mappability +19 48484600 48496700 High Signal Region +19 48743800 48746300 High Signal Region +19 50107900 50114400 Low Mappability +19 50309700 50311600 High Signal Region +19 50754100 50755900 Low Mappability +19 50828900 50835600 High Signal Region +19 51649700 51655800 High Signal Region +19 51949000 51955700 Low Mappability +19 52303100 52309700 Low Mappability +19 52927900 52932300 Low Mappability +19 52967800 52991100 Low Mappability +19 53522200 53527100 High Signal Region +19 53767900 53777800 High Signal Region +19 54235200 54236600 High Signal Region +19 54884700 54936800 High Signal Region +19 54994900 55001700 Low Mappability +19 55976700 55984000 Low Mappability +19 56248700 56259000 Low Mappability +19 56846600 56849100 High Signal Region +19 57514200 57520700 Low Mappability +19 57634000 57635600 Low Mappability +19 57827000 57832700 Low Mappability +19 58012500 58014600 Low Mappability +19 58112400 58114500 High Signal Region +19 58481300 58483200 High Signal Region +19 59221800 59240400 High Signal Region +19 59763100 59779900 High Signal Region +19 60082500 60089900 High Signal Region +19 60906900 60934000 High Signal Region +19 61162600 61174300 Low Mappability +19 61197700 61268100 High Signal Region +19 61330300 61431500 High Signal Region +1 8628600 8719100 High Signal Region +1 12038300 12041400 High Signal Region +1 14958600 14992600 High Signal Region +1 17466800 17479900 High Signal Region +1 18872500 18901300 High Signal Region +1 19175300 19177200 High Signal Region +1 22555000 22556900 High Signal Region +1 24610600 24617100 High Signal Region +1 24683100 24685100 High Signal Region +1 26685100 26689200 High Signal Region +1 43776800 43779800 High Signal Region +1 44198000 44202200 High Signal Region +1 46701700 46756600 High Signal Region +1 48880600 48882500 High Signal Region +1 56119600 56143500 High Signal Region +1 56772200 56783300 High Signal Region +1 58613000 58614900 High Signal Region +1 63629100 63631600 High Signal Region +1 69455800 69457800 High Signal Region +1 71078400 71085500 High Signal Region +1 71250600 71256700 High Signal Region +1 73549100 73555300 High Signal Region +1 73832600 73902400 High Signal Region +1 78572900 78575400 High Signal Region +1 84953500 85663200 High Signal Region +1 88209400 88311700 High Signal Region +1 94093800 94109400 High Signal Region +1 95451000 95452900 High Signal Region +1 95783900 95789700 High Signal Region +1 95810200 95851700 High Signal Region +1 100737900 100760500 High Signal Region +1 101040100 101046300 High Signal Region +1 102627300 102644300 High Signal Region +1 105226800 105230700 High Signal Region +1 110170400 110188300 High Signal Region +1 113602700 113604800 High Signal Region +1 114557300 114579100 High Signal Region +1 114643300 114660500 High Signal Region +1 115447500 115482800 High Signal Region +1 122356200 122358200 High Signal Region +1 133593600 133611300 High Signal Region +1 142651800 142672300 High Signal Region +1 145444500 145449100 High Signal Region +1 146120600 146128200 High Signal Region +1 151181600 151212000 High Signal Region +1 165862800 165864700 Low Mappability +1 171033000 171112400 High Signal Region +1 172716800 172738200 High Signal Region +1 172878700 172885100 High Signal Region +1 178538700 178540700 High Signal Region +1 181742100 181752400 High Signal Region +1 182628900 182630800 High Signal Region +1 183298200 183300500 High Signal Region +1 190299400 190304600 High Signal Region +1 192453100 192471800 High Signal Region +1 193226900 193228800 High Signal Region +1 195239800 195257400 High Signal Region +1 195278100 195280200 High Signal Region +1 195320700 195471900 High Signal Region +2 0 3086300 High Signal Region +2 3474900 3488800 High Signal Region +2 3932700 3939100 Low Mappability +2 3963500 3986100 High Signal Region +2 4515100 4518600 High Signal Region +2 4600600 4620300 High Signal Region +2 5378100 5394600 High Signal Region +2 5545900 5561600 High Signal Region +2 6078200 6095300 High Signal Region +2 6773100 6777500 Low Mappability +2 6832200 6846700 High Signal Region +2 7137500 7139600 High Signal Region +2 7404000 7458100 High Signal Region +2 7571700 7609800 High Signal Region +2 7656300 7669700 Low Mappability +2 7752800 7758500 High Signal Region +2 8034600 8042900 High Signal Region +2 8266200 8275600 High Signal Region +2 8528400 8535700 High Signal Region +2 8938000 8940500 High Signal Region +2 9212600 9219300 High Signal Region +2 10177100 10183400 Low Mappability +2 10483200 10501500 Low Mappability +2 10677000 10697600 Low Mappability +2 12605500 12668600 High Signal Region +2 13824000 13869200 High Signal Region +2 13946300 13948900 High Signal Region +2 14014100 14035300 High Signal Region +2 14359100 14386600 High Signal Region +2 14919000 14924500 High Signal Region +2 15301300 15334700 High Signal Region +2 15430100 15435500 Low Mappability +2 15575900 15602800 High Signal Region +2 15716700 15721100 High Signal Region +2 15768300 15770500 High Signal Region +2 16192400 16198500 High Signal Region +2 16320200 16326500 Low Mappability +2 16762800 16787000 High Signal Region +2 17383200 17385100 High Signal Region +2 17612500 17654500 Low Mappability +2 17747200 17753000 High Signal Region +2 19209900 19212900 High Signal Region +2 19498400 19510300 High Signal Region +2 19707900 19712200 High Signal Region +2 20038500 20067400 Low Mappability +2 20426800 20433300 Low Mappability +2 20898900 20901100 High Signal Region +2 21062600 21082200 Low Mappability +2 22049700 22087700 High Signal Region +2 22137300 22165500 High Signal Region +2 22389900 22608700 High Signal Region +2 22737300 22745800 High Signal Region +2 23009600 23015000 Low Mappability +2 23274600 23304900 High Signal Region +2 23693700 23707900 High Signal Region +2 24193300 24199000 High Signal Region +2 26333100 26351900 Low Mappability +2 26759100 26763600 High Signal Region +2 26998200 27004400 Low Mappability +2 28183200 28205000 High Signal Region +2 30204600 30239600 Low Mappability +2 32381300 32488200 Low Mappability +2 33933000 33935300 High Signal Region +2 34049900 34051800 High Signal Region +2 34903900 34935900 Low Mappability +2 35090800 35109900 High Signal Region +2 35505000 35526700 Low Mappability +2 36008600 36019300 Low Mappability +2 36401900 36413100 High Signal Region +2 36508600 36515200 High Signal Region +2 36542800 36549100 High Signal Region +2 36761000 36766500 High Signal Region +2 36951900 36970700 High Signal Region +2 37156900 37185900 High Signal Region +2 37339700 37359400 Low Mappability +2 38564700 38566600 Low Mappability +2 39225400 39293200 High Signal Region +2 39360600 39367900 Low Mappability +2 39517800 39534800 High Signal Region +2 39778500 39785700 Low Mappability +2 39887500 39915800 High Signal Region +2 40131200 40240800 High Signal Region +2 40262500 40268600 High Signal Region +2 40766400 40794000 High Signal Region +2 41059500 41070200 Low Mappability +2 41168700 41171400 High Signal Region +2 41692800 41694800 High Signal Region +2 41744300 41751600 Low Mappability +2 41775100 41781500 High Signal Region +2 41895300 41897200 High Signal Region +2 42044500 42051600 High Signal Region +2 42200300 42240700 High Signal Region +2 42950100 42956600 High Signal Region +2 43347900 43356400 High Signal Region +2 44936600 44942400 High Signal Region +2 46224800 46226700 High Signal Region +2 46343100 46348100 Low Mappability +2 46574200 46579600 Low Mappability +2 47008600 47023500 High Signal Region +2 47196300 47199300 High Signal Region +2 47533600 47642600 High Signal Region +2 47942200 47943800 High Signal Region +2 48483000 48491000 Low Mappability +2 50543200 50545500 High Signal Region +2 50679600 50686800 Low Mappability +2 51552600 51555600 High Signal Region +2 51750900 51756000 High Signal Region +2 51881600 51890600 Low Mappability +2 51945900 51948400 High Signal Region +2 52695900 52718600 High Signal Region +2 52786800 52796300 High Signal Region +2 53317700 53321600 Low Mappability +2 53347800 53367000 High Signal Region +2 53633400 53642900 High Signal Region +2 53745700 53799800 High Signal Region +2 54252600 54258500 High Signal Region +2 54698000 54747900 High Signal Region +2 54862600 54895300 High Signal Region +2 55197500 55216400 High Signal Region +2 55308300 55353700 High Signal Region +2 55823800 55829000 High Signal Region +2 55860200 55874300 Low Mappability +2 55942000 55947800 High Signal Region +2 56192800 56194600 High Signal Region +2 56298700 56304900 High Signal Region +2 56465200 56471900 High Signal Region +2 56834300 56879100 High Signal Region +2 56988500 56990600 Low Mappability +2 57166400 57172900 Low Mappability +2 57214400 57223500 Low Mappability +2 57417400 57446500 High Signal Region +2 57628500 57633800 High Signal Region +2 57726600 57728500 High Signal Region +2 58212900 58263100 High Signal Region +2 58648300 58691900 High Signal Region +2 58881200 58902500 High Signal Region +2 59971300 59972800 Low Mappability +2 61038200 61042700 High Signal Region +2 61959600 61965300 High Signal Region +2 62022900 62040100 High Signal Region +2 62861100 62867200 High Signal Region +2 63297300 63302700 Low Mappability +2 63368100 63403900 High Signal Region +2 63462300 63483800 High Signal Region +2 63641200 63654600 High Signal Region +2 63718200 63725400 High Signal Region +2 63838100 63845300 Low Mappability +2 64309200 64319600 High Signal Region +2 64608400 64633400 Low Mappability +2 64698700 64703300 High Signal Region +2 65592500 65602200 High Signal Region +2 65737700 65781500 Low Mappability +2 66721600 66750400 High Signal Region +2 66845100 66852300 High Signal Region +2 67408400 67414500 High Signal Region +2 67939700 67946000 High Signal Region +2 68770400 68776700 High Signal Region +2 68917800 68924100 Low Mappability +2 69353900 69356600 High Signal Region +2 70263100 70270000 Low Mappability +2 70880100 70892900 High Signal Region +2 71054700 71071300 Low Mappability +2 71942000 71949500 Low Mappability +2 72270200 72275700 Low Mappability +2 73867000 73868900 High Signal Region +2 74364300 74402600 Low Mappability +2 74437600 74444900 Low Mappability +2 75499500 75504600 High Signal Region +2 77224000 77230500 Low Mappability +2 78318000 78339500 High Signal Region +2 79437700 79441900 High Signal Region +2 79936500 79943700 High Signal Region +2 80119000 80121500 High Signal Region +2 80220600 80257700 Low Mappability +2 80795600 80838700 High Signal Region +2 80879000 80880200 High Signal Region +2 80956500 81006000 High Signal Region +2 81069000 81075100 High Signal Region +2 81639400 81644800 High Signal Region +2 81750800 81756800 High Signal Region +2 81790000 81795900 High Signal Region +2 82329800 82340100 High Signal Region +2 82673800 82679900 High Signal Region +2 82714300 82728500 High Signal Region +2 82783900 82789500 High Signal Region +2 82868800 82887900 High Signal Region +2 82916300 82936800 High Signal Region +2 83120100 83146100 High Signal Region +2 83185100 83193200 High Signal Region +2 83325900 83328200 High Signal Region +2 83413500 83587500 High Signal Region +2 83865600 83893100 High Signal Region +2 83931600 83995800 Low Mappability +2 84080900 84085600 High Signal Region +2 84505000 84510500 Low Mappability +2 84532500 84534600 Low Mappability +2 84564800 84576000 Low Mappability +2 85685600 85701800 Low Mappability +2 85874000 85896300 High Signal Region +2 86018200 86021700 Low Mappability +2 86303400 86317700 High Signal Region +2 86339600 86346900 Low Mappability +2 86612700 86617500 High Signal Region +2 87381000 87382800 High Signal Region +2 87875700 87941300 High Signal Region +2 88167400 88212600 High Signal Region +2 88776200 88780800 High Signal Region +2 89206600 89277100 Low Mappability +2 89345700 89350400 High Signal Region +2 89761200 89775100 High Signal Region +2 89856400 89920100 High Signal Region +2 90127200 90132700 High Signal Region +2 90157100 90249100 High Signal Region +2 90273200 90279100 High Signal Region +2 90309300 90396100 High Signal Region +2 92092600 92094700 High Signal Region +2 92167200 92169100 High Signal Region +2 93824700 93850200 High Signal Region +2 94602800 94607800 Low Mappability +2 94633900 94656500 High Signal Region +2 94801000 94809400 Low Mappability +2 94852800 94891200 High Signal Region +2 95064700 95093500 Low Mappability +2 95148000 95167800 High Signal Region +2 95215900 95320600 High Signal Region +2 95414700 95420600 High Signal Region +2 95536400 95538400 Low Mappability +2 95647900 95654300 High Signal Region +2 95794500 95799200 High Signal Region +2 95929300 95934400 High Signal Region +2 96191400 96208900 High Signal Region +2 96547800 96566800 Low Mappability +2 96954700 96977300 High Signal Region +2 97021000 97034600 High Signal Region +2 97308000 97327600 High Signal Region +2 97671600 97686300 High Signal Region +2 97760700 97765800 High Signal Region +2 97872400 97958200 High Signal Region +2 98361700 98449600 High Signal Region +2 98659400 98668200 High Signal Region +2 98796500 98801900 High Signal Region +2 99020000 99057500 High Signal Region +2 99300200 99320300 High Signal Region +2 99944600 99970200 High Signal Region +2 100112000 100114300 High Signal Region +2 100223900 100238300 High Signal Region +2 100418400 100777900 Low Mappability +2 101127200 101153600 Low Mappability +2 101313100 101350600 High Signal Region +2 102828400 102830400 High Signal Region +2 103231300 103232300 High Signal Region +2 103852300 103872800 High Signal Region +2 104684900 104697300 High Signal Region +2 105249300 105259000 High Signal Region +2 105539300 105563200 Low Mappability +2 105825900 105865100 High Signal Region +2 106555100 106569300 High Signal Region +2 107134100 107140900 High Signal Region +2 107593900 107601200 Low Mappability +2 107710100 107712400 High Signal Region +2 108608600 108614000 High Signal Region +2 108945100 108972800 High Signal Region +2 109629400 109636000 High Signal Region +2 110016800 110025500 High Signal Region +2 110091100 110128700 High Signal Region +2 110157100 110163300 High Signal Region +2 110292700 110294600 High Signal Region +2 110545800 110583400 High Signal Region +2 110752400 110780100 High Signal Region +2 111007400 111018600 High Signal Region +2 111042000 111046600 High Signal Region +2 111172700 111179800 High Signal Region +2 111281500 111287900 Low Mappability +2 111545600 111553300 Low Mappability +2 111716900 111722900 High Signal Region +2 111844900 111866400 High Signal Region +2 111890900 111898900 High Signal Region +2 112053900 112086000 High Signal Region +2 112319700 112326200 Low Mappability +2 112522900 112570500 High Signal Region +2 112602800 112605100 High Signal Region +2 112701400 112707900 High Signal Region +2 113095800 113102400 Low Mappability +2 113330900 113333000 Low Mappability +2 113518400 113524900 Low Mappability +2 113564300 113565700 High Signal Region +2 113659300 113673200 High Signal Region +2 114180800 114187400 Low Mappability +2 114242400 114244000 High Signal Region +2 114469200 114504000 High Signal Region +2 116454300 116524000 High Signal Region +2 117829600 117835500 High Signal Region +2 118017700 118020200 High Signal Region +2 120608600 120650200 High Signal Region +2 120810300 120821000 High Signal Region +2 121435600 121523600 High Signal Region +2 121938800 121957600 High Signal Region +2 122680400 122683200 High Signal Region +2 123288000 123294300 Low Mappability +2 123496800 123525300 High Signal Region +2 123785200 123790700 High Signal Region +2 124002700 124004600 High Signal Region +2 124798800 124835800 High Signal Region +2 125625000 125635900 Low Mappability +2 126217400 126263800 High Signal Region +2 126445400 126447400 Low Mappability +2 126964900 126972100 Low Mappability +2 127720400 127734000 Low Mappability +2 128050800 128053200 High Signal Region +2 128480400 128486900 Low Mappability +2 128772500 128774500 Low Mappability +2 129499400 129523400 High Signal Region +2 129602700 129613700 Low Mappability +2 131791800 131793800 High Signal Region +2 131908300 131931100 Low Mappability +2 131963900 131983700 High Signal Region +2 132885700 132890400 High Signal Region +2 132952400 132954500 Low Mappability +2 133053200 133083400 High Signal Region +2 133239300 133261800 High Signal Region +2 133934000 133937500 High Signal Region +2 134560100 134577900 High Signal Region +2 134661800 134673000 High Signal Region +2 134746600 134751100 High Signal Region +2 135146800 135151900 High Signal Region +2 135987600 135989700 High Signal Region +2 136234300 136286800 Low Mappability +2 137028200 137037000 High Signal Region +2 137345900 137369900 High Signal Region +2 137394500 137405600 High Signal Region +2 137640000 137642300 High Signal Region +2 137890200 137895000 High Signal Region +2 138035000 138056400 Low Mappability +2 138573700 138580400 High Signal Region +2 138621500 138624200 High Signal Region +2 138833600 138853100 High Signal Region +2 138904300 138935000 High Signal Region +2 139433200 139476200 High Signal Region +2 140345800 140352400 Low Mappability +2 142197000 142204400 Low Mappability +2 142464200 142483300 Low Mappability +2 142789100 142795600 Low Mappability +2 143275500 143290300 High Signal Region +2 143725900 143764700 High Signal Region +2 144627800 144636700 Low Mappability +2 144975200 144977100 High Signal Region +2 145001300 145003200 High Signal Region +2 145118300 145146300 Low Mappability +2 145236800 145242600 Low Mappability +2 145625100 145630800 Low Mappability +2 145732700 145734600 High Signal Region +2 146135700 146176900 High Signal Region +2 146995700 147013200 Low Mappability +2 147675300 147677500 High Signal Region +2 147864800 147871300 High Signal Region +2 147918800 147925100 Low Mappability +2 148410500 148416000 Low Mappability +2 148459900 148473800 High Signal Region +2 148612700 148620200 Low Mappability +2 148939300 148984200 High Signal Region +2 149049800 149056000 High Signal Region +2 149269400 149292700 High Signal Region +2 150413500 150452500 High Signal Region +2 150728300 150749700 Low Mappability +2 151029700 151385300 High Signal Region +2 151408800 151496700 High Signal Region +2 152157000 152159000 Low Mappability +2 152206800 152227500 High Signal Region +2 152263400 152269900 Low Mappability +2 153674800 153693100 Low Mappability +2 154174200 154180000 High Signal Region +2 154353800 154359700 Low Mappability +2 155016300 155051500 High Signal Region +2 155235400 155258100 High Signal Region +2 156185100 156214400 Low Mappability +2 157566000 157655300 Low Mappability +2 157833200 157835600 High Signal Region +2 158286300 158292800 High Signal Region +2 159455200 159469500 High Signal Region +2 160620300 160638500 High Signal Region +2 161368800 161376200 High Signal Region +2 161984900 161990900 High Signal Region +2 162369100 162376700 High Signal Region +2 162594500 162602700 High Signal Region +2 162843800 162847600 High Signal Region +2 163519100 163533100 Low Mappability +2 163644500 163655100 High Signal Region +2 163788900 163796100 Low Mappability +2 163833800 163849200 Low Mappability +2 163958100 163963000 Low Mappability +2 164201000 164202700 High Signal Region +2 165477300 165529900 Low Mappability +2 165675100 165679500 Low Mappability +2 165848700 165953000 Low Mappability +2 166530600 166535100 Low Mappability +2 166780500 166832200 Low Mappability +2 167269400 167291100 High Signal Region +2 167407900 167423000 Low Mappability +2 170315100 170320000 High Signal Region +2 170503800 170509800 High Signal Region +2 171814300 171816700 High Signal Region +2 171912800 171932200 Low Mappability +2 172007100 172014300 High Signal Region +2 172743600 172751100 Low Mappability +2 173098700 173101000 Low Mappability +2 173706700 173708800 High Signal Region +2 174961800 176745500 High Signal Region +2 176767100 177166600 High Signal Region +2 177232400 177490200 High Signal Region +2 177526700 177841000 High Signal Region +2 178775000 178794400 High Signal Region +2 180025600 180093500 Low Mappability +2 181169900 181188000 Low Mappability +2 181285900 181298800 High Signal Region +2 181739800 181745800 High Signal Region +2 181885000 181933400 High Signal Region +2 182003800 182113200 High Signal Region +3 0 3052500 High Signal Region +3 3084100 3098300 High Signal Region +3 3123200 3150800 High Signal Region +3 3443300 3493700 High Signal Region +3 4698100 4725500 High Signal Region +3 5517700 5525000 Low Mappability +3 5859400 5863500 High Signal Region +3 6115100 6117100 High Signal Region +3 6601900 6627400 High Signal Region +3 6900700 6916400 High Signal Region +3 6941100 6946600 High Signal Region +3 7178300 7223900 High Signal Region +3 7477600 7482500 High Signal Region +3 7910300 7916600 High Signal Region +3 8225200 8247500 High Signal Region +3 8574000 8589900 High Signal Region +3 8815300 8838700 High Signal Region +3 9091900 9096900 Low Mappability +3 9777500 9778500 High Signal Region +3 9904100 9910700 High Signal Region +3 9952100 9967100 High Signal Region +3 10453800 10464500 High Signal Region +3 10961700 10971700 High Signal Region +3 11050200 11070500 High Signal Region +3 11120700 11143300 High Signal Region +3 11518700 11524700 High Signal Region +3 11779200 11806000 High Signal Region +3 11933500 11938400 High Signal Region +3 11961500 11973100 High Signal Region +3 12107500 12131400 High Signal Region +3 12221200 12262000 High Signal Region +3 12336000 12339700 High Signal Region +3 12814500 12857800 Low Mappability +3 12906200 12907300 High Signal Region +3 13219400 13222800 High Signal Region +3 13821100 13826600 Low Mappability +3 13965800 13972000 High Signal Region +3 14272100 14336300 High Signal Region +3 14449600 14478500 High Signal Region +3 14593200 14597400 High Signal Region +3 14668900 14744700 High Signal Region +3 15028800 15045100 High Signal Region +3 15079500 15087400 High Signal Region +3 15451600 15872400 High Signal Region +3 15964200 15967200 High Signal Region +3 16351400 16357100 High Signal Region +3 16626000 16633700 High Signal Region +3 16995700 17021400 High Signal Region +3 17419700 17447600 High Signal Region +3 17679600 17682100 High Signal Region +3 17954200 17997400 High Signal Region +3 18379800 18395100 High Signal Region +3 18432100 18437500 High Signal Region +3 18966900 18983600 High Signal Region +3 19357600 19359300 High Signal Region +3 19594900 19601100 High Signal Region +3 19917700 19940300 High Signal Region +3 21247500 21250200 High Signal Region +3 21317800 21324600 High Signal Region +3 21383700 21389000 High Signal Region +3 21512900 21519300 High Signal Region +3 21661800 21663700 Low Mappability +3 21685300 21709500 High Signal Region +3 22069200 22070500 High Signal Region +3 22240800 22250100 High Signal Region +3 22362000 22377000 High Signal Region +3 22517600 22521100 High Signal Region +3 22612100 22759200 High Signal Region +3 22933800 23015000 High Signal Region +3 23077300 23099800 High Signal Region +3 23173700 23180900 Low Mappability +3 23302200 23321100 High Signal Region +3 23353500 23360000 High Signal Region +3 23463300 23468200 High Signal Region +3 23579500 23584900 High Signal Region +3 23841700 23843800 Low Mappability +3 24624400 24627900 High Signal Region +3 24655200 24661300 High Signal Region +3 25210800 25228800 Low Mappability +3 25277500 25310400 High Signal Region +3 25416900 25421600 Low Mappability +3 25472900 25478900 High Signal Region +3 26089400 26113400 High Signal Region +3 26346800 26369700 High Signal Region +3 26724600 26737000 High Signal Region +3 26944500 26950800 High Signal Region +3 27010100 27023300 High Signal Region +3 27309300 27319800 Low Mappability +3 28198300 28201300 Low Mappability +3 28513900 28535500 High Signal Region +3 28983500 29014200 High Signal Region +3 29461500 29492300 High Signal Region +3 29675900 29680600 High Signal Region +3 31176300 31188900 Low Mappability +3 31340700 31364500 Low Mappability +3 31651800 31680100 High Signal Region +3 31819800 31826900 High Signal Region +3 33696500 33708400 High Signal Region +3 33768300 33798500 High Signal Region +3 33930000 33948800 Low Mappability +3 34516200 34518200 High Signal Region +3 35285400 35292700 High Signal Region +3 35707000 35713500 Low Mappability +3 35743300 35744600 High Signal Region +3 36106500 36109400 High Signal Region +3 36285400 36291100 High Signal Region +3 36847300 36853900 High Signal Region +3 39026800 39030900 High Signal Region +3 39183300 39189800 High Signal Region +3 40151300 40157700 High Signal Region +3 40347600 40352600 High Signal Region +3 40549300 40651700 High Signal Region +3 41871900 41887800 High Signal Region +3 41993500 41999500 High Signal Region +3 42170000 42187300 High Signal Region +3 42682100 42722800 High Signal Region +3 42820200 42827400 High Signal Region +3 43108100 43197200 High Signal Region +3 43466400 43492100 High Signal Region +3 43538900 43557700 High Signal Region +3 44185900 44191600 High Signal Region +3 44241200 44260000 High Signal Region +3 44401500 44407500 High Signal Region +3 44559600 44565200 High Signal Region +3 44884400 44890700 High Signal Region +3 45579200 45591900 High Signal Region +3 45848500 45863400 Low Mappability +3 45986000 45990700 High Signal Region +3 46141000 46148200 High Signal Region +3 46338200 46340300 Low Mappability +3 46735000 46741900 High Signal Region +3 46795400 46805400 High Signal Region +3 46910900 46936200 High Signal Region +3 47592800 47598000 High Signal Region +3 47798300 47799600 High Signal Region +3 47966600 47968700 High Signal Region +3 48437800 48462000 High Signal Region +3 49443600 49482800 High Signal Region +3 49727200 49734400 High Signal Region +3 50464900 50474400 High Signal Region +3 50763700 50814900 High Signal Region +3 50957300 50963000 High Signal Region +3 51233600 51245400 Low Mappability +3 51616000 51623700 Low Mappability +3 51765300 51784900 High Signal Region +3 52230000 52233400 High Signal Region +3 53426900 53431000 High Signal Region +3 54849100 54874300 Low Mappability +3 56069700 56075200 High Signal Region +3 56210900 56215900 High Signal Region +3 56513600 56576700 High Signal Region +3 56903800 56943000 High Signal Region +3 57059400 57070200 High Signal Region +3 57349800 57379400 High Signal Region +3 58051100 58081600 Low Mappability +3 59370700 59412200 High Signal Region +3 59565300 59632700 High Signal Region +3 59684600 59689200 High Signal Region +3 59791800 59804200 Low Mappability +3 59887400 59889300 High Signal Region +3 59919200 59921100 High Signal Region +3 60044300 60046800 High Signal Region +3 60489700 60495200 Low Mappability +3 61150800 61177900 High Signal Region +3 61260700 61275000 Low Mappability +3 61495400 61499700 High Signal Region +3 61672300 61678300 High Signal Region +3 61707600 61726600 Low Mappability +3 61853900 61858900 High Signal Region +3 62032400 62038600 High Signal Region +3 62108300 62160100 High Signal Region +3 62356900 62367700 High Signal Region +3 62543000 62549200 High Signal Region +3 62873000 62879300 High Signal Region +3 63515500 63530100 High Signal Region +3 63590100 63591500 High Signal Region +3 64171000 64172900 High Signal Region +3 64237900 64245700 High Signal Region +3 64453100 64512800 High Signal Region +3 64609600 64665300 Low Mappability +3 64697900 64730500 High Signal Region +3 67027900 67054100 High Signal Region +3 67262400 67264000 High Signal Region +3 67411100 67419400 High Signal Region +3 67747300 67752800 High Signal Region +3 67786800 67793600 High Signal Region +3 68114300 68119700 Low Mappability +3 68519400 68525100 High Signal Region +3 69228600 69230500 High Signal Region +3 69848400 69854900 High Signal Region +3 69944400 69949800 High Signal Region +3 71117300 71122800 High Signal Region +3 71369600 71447800 High Signal Region +3 72273600 72293700 High Signal Region +3 72698100 72704800 High Signal Region +3 73088300 73098500 High Signal Region +3 73733100 73738500 Low Mappability +3 74583300 74598400 Low Mappability +3 74865000 74881800 High Signal Region +3 75348300 75378700 Low Mappability +3 75409000 75424100 High Signal Region +3 76598800 76604700 High Signal Region +3 76886600 76892900 Low Mappability +3 77597400 77604300 Low Mappability +3 77667400 77711400 High Signal Region +3 77926800 77931400 High Signal Region +3 78281900 78283900 Low Mappability +3 79012700 79014900 High Signal Region +3 79046300 79052800 Low Mappability +3 79763800 79780000 High Signal Region +3 79959500 79965700 High Signal Region +3 80465400 80472000 High Signal Region +3 82283300 82288700 Low Mappability +3 82462100 82508600 Low Mappability +3 82589000 82616700 Low Mappability +3 82921400 82924800 High Signal Region +3 83123200 83125100 High Signal Region +3 83330900 83343400 High Signal Region +3 83845100 83867000 High Signal Region +3 84142200 84149700 Low Mappability +3 84359000 84366300 Low Mappability +3 85305200 85326800 Low Mappability +3 85622200 85629500 Low Mappability +3 87424200 87426100 High Signal Region +3 87469300 87474600 High Signal Region +3 88044000 88066500 High Signal Region +3 88666500 88673500 Low Mappability +3 88716700 88873000 Low Mappability +3 90761500 90810400 High Signal Region +3 90991100 90996800 Low Mappability +3 91856700 91898200 High Signal Region +3 92185400 92291300 High Signal Region +3 93059200 93107000 High Signal Region +3 93168500 93172800 High Signal Region +3 93203900 93229100 High Signal Region +3 93323700 93331700 Low Mappability +3 93860300 94093700 High Signal Region +3 94136200 94152300 High Signal Region +3 94658300 94665700 Low Mappability +3 94690000 94730800 High Signal Region +3 94757600 94765200 Low Mappability +3 96043600 96058900 High Signal Region +3 96196200 96288300 High Signal Region +3 96313200 96388900 Low Mappability +3 96446800 96463800 Low Mappability +3 96485600 96514300 High Signal Region +3 96840000 96863800 High Signal Region +3 97245200 97251500 High Signal Region +3 98396100 98411400 High Signal Region +3 98443100 98597600 Low Mappability +3 98709300 98778900 High Signal Region +3 98986000 99034100 High Signal Region +3 99406000 99434100 High Signal Region +3 99882900 99908100 High Signal Region +3 99980200 99982200 High Signal Region +3 100315500 100330900 High Signal Region +3 100484400 100486300 High Signal Region +3 102813400 102839300 High Signal Region +3 102983600 102989900 High Signal Region +3 103134600 103136000 High Signal Region +3 103427600 103447900 High Signal Region +3 103555000 103557000 Low Mappability +3 104116800 104123100 High Signal Region +3 104194200 104198800 High Signal Region +3 104588100 104595500 Low Mappability +3 105028200 105030500 High Signal Region +3 106118500 106311800 High Signal Region +3 106777900 106779700 High Signal Region +3 109258500 109277300 High Signal Region +3 109458000 109462700 High Signal Region +3 110319800 110325700 High Signal Region +3 110416300 110421800 High Signal Region +3 111256100 111268600 High Signal Region +3 111578400 111605200 Low Mappability +3 111794100 111799000 Low Mappability +3 111830400 111836300 High Signal Region +3 112274500 112287300 High Signal Region +3 112315500 112337400 High Signal Region +3 112561900 112586900 High Signal Region +3 112863500 112869300 High Signal Region +3 112913800 112918000 High Signal Region +3 113186300 113189100 High Signal Region +3 113250900 113527800 High Signal Region +3 113709900 113719000 High Signal Region +3 113742300 113748300 High Signal Region +3 114272600 114279400 High Signal Region +3 114472100 114499300 Low Mappability +3 114587900 114595900 High Signal Region +3 114976700 114982800 High Signal Region +3 115020700 115027100 Low Mappability +3 115367700 115372200 Low Mappability +3 115905900 115922900 High Signal Region +3 116817400 116843900 Low Mappability +3 117267200 117292400 High Signal Region +3 117379100 117386400 Low Mappability +3 118055100 118060000 High Signal Region +3 119211800 119212900 High Signal Region +3 120735000 120742200 High Signal Region +3 120825200 120851500 High Signal Region +3 121248900 121250900 High Signal Region +3 121694400 121696100 High Signal Region +3 122294000 122329300 High Signal Region +3 122654100 122657300 High Signal Region +3 122804300 122806600 High Signal Region +3 123471600 123476200 Low Mappability +3 123729200 123743200 High Signal Region +3 123924800 123957700 High Signal Region +3 124282300 124288300 High Signal Region +3 125902800 125908900 High Signal Region +3 126127300 126136000 Low Mappability +3 126905300 126910600 High Signal Region +3 127522400 127523700 Low Mappability +3 127771600 127780600 High Signal Region +3 128203600 128211000 High Signal Region +3 128440100 128446100 High Signal Region +3 128935800 128937700 High Signal Region +3 129020900 129032100 High Signal Region +3 129393000 129394900 High Signal Region +3 133123600 133130800 Low Mappability +3 133566400 133568700 High Signal Region +3 133636000 133642800 High Signal Region +3 133837100 133859400 High Signal Region +3 134007400 134026700 Low Mappability +3 134685700 134690700 High Signal Region +3 134862500 134888400 High Signal Region +3 135148300 135163000 High Signal Region +3 136173700 136181000 Low Mappability +3 137407500 137413500 High Signal Region +3 137469200 137470300 High Signal Region +3 138200900 138207900 High Signal Region +3 139365700 139417700 High Signal Region +3 140376900 140384200 Low Mappability +3 142190700 142192800 High Signal Region +3 142513000 142517200 High Signal Region +3 143840800 143847000 High Signal Region +3 144030200 144036300 High Signal Region +3 144655600 144660600 High Signal Region +3 145040500 145061800 High Signal Region +3 145109000 145114400 Low Mappability +3 145188100 145190400 High Signal Region +3 145301600 145303100 High Signal Region +3 146073300 146102400 High Signal Region +3 146358800 146362600 High Signal Region +3 146476200 146479000 High Signal Region +3 146918900 146924200 High Signal Region +3 147107400 147113000 High Signal Region +3 147769500 147781800 High Signal Region +3 147874500 147877600 High Signal Region +3 148704800 148716900 High Signal Region +3 148750100 148757400 Low Mappability +3 148797800 148799700 High Signal Region +3 149051500 149053800 High Signal Region +3 150120900 150123800 High Signal Region +3 150336900 150341400 Low Mappability +3 151028900 151031200 High Signal Region +3 151657500 151679800 High Signal Region +3 152313800 152332200 High Signal Region +3 152700700 152702700 High Signal Region +3 153090100 153109400 High Signal Region +3 154640300 154646700 High Signal Region +3 154931700 154932800 High Signal Region +3 155515800 155517600 High Signal Region +3 155765900 155771900 High Signal Region +3 156256900 156262800 Low Mappability +3 156285600 156322500 High Signal Region +3 156799400 156804900 Low Mappability +3 157646900 157678300 High Signal Region +3 157946200 157969400 High Signal Region +3 158095300 158119200 High Signal Region +3 158698600 158756800 High Signal Region +3 159165900 159179700 High Signal Region +3 159225800 159239300 Low Mappability +3 159478300 159479700 High Signal Region +3 159748800 159826500 High Signal Region +3 159938500 160039600 High Signal Region +4 0 3114800 High Signal Region +4 3139700 3333100 High Signal Region +4 18476200 18498400 High Signal Region +4 20168700 20213200 High Signal Region +4 20804100 20808300 High Signal Region +4 20982300 20983700 High Signal Region +4 21281300 21287700 High Signal Region +4 22535900 22542300 High Signal Region +4 24193400 24201100 High Signal Region +4 25471300 25473200 High Signal Region +4 28175900 28177900 High Signal Region +4 31353200 31355200 High Signal Region +4 34934800 34936700 High Signal Region +4 35042700 35048900 High Signal Region +4 38305900 38322000 High Signal Region +4 57979700 57981800 High Signal Region +4 64454600 64499000 High Signal Region +4 68427300 68447900 High Signal Region +4 70367200 70379200 High Signal Region +4 73196300 73209300 High Signal Region +4 80001800 80004900 High Signal Region +4 83536900 83541900 High Signal Region +4 90725600 90727500 High Signal Region +4 92230800 92236500 High Signal Region +4 93843500 93853100 High Signal Region +4 99380500 99382400 High Signal Region +4 110469700 110505300 High Signal Region +4 118546100 118549600 High Signal Region +4 131222500 131229300 High Signal Region +4 145404200 147840400 High Signal Region +4 149809200 149811700 High Signal Region +4 153152100 153154100 High Signal Region +4 156256000 156508100 High Signal Region +5 3175400 3186000 High Signal Region +5 12489500 12490600 High Signal Region +5 14899000 15726800 High Signal Region +5 17466700 17481500 High Signal Region +5 36629400 36662500 High Signal Region +5 46434800 46436700 High Signal Region +5 49722200 49755700 High Signal Region +5 60041900 60043900 Low Mappability +5 80499900 80501900 High Signal Region +5 93288700 93351800 High Signal Region +5 106126300 106177800 High Signal Region +5 110063700 110075500 High Signal Region +5 114921500 114923500 High Signal Region +5 137148800 137153800 High Signal Region +5 146260000 146262300 High Signal Region +5 151733600 151834600 High Signal Region +6 0 3255700 High Signal Region +6 3280700 3340300 High Signal Region +6 4922900 4925100 High Signal Region +6 5608000 5657900 High Signal Region +6 5704400 5706800 High Signal Region +6 6400000 6442800 High Signal Region +6 6700000 6727600 High Signal Region +6 8729200 8731100 High Signal Region +6 8906700 8932300 High Signal Region +6 9519200 9529100 High Signal Region +6 9580600 9610100 High Signal Region +6 9646900 9663400 High Signal Region +6 9720400 9733100 High Signal Region +6 9889000 9891100 High Signal Region +6 10228400 10269900 High Signal Region +6 10559100 10588400 High Signal Region +6 10623400 10633900 High Signal Region +6 11251100 11256800 High Signal Region +6 11406400 11457900 High Signal Region +6 11813900 11897100 High Signal Region +6 12671100 12680300 High Signal Region +6 13390500 13394900 High Signal Region +6 13700500 13743100 High Signal Region +6 14085000 14092300 Low Mappability +6 14793800 14805500 High Signal Region +6 14929200 14935100 High Signal Region +6 16299700 16310100 High Signal Region +6 16922600 16924800 High Signal Region +6 17004600 17042000 High Signal Region +6 17391200 17397900 High Signal Region +6 17981700 17983400 High Signal Region +6 18264800 18267200 High Signal Region +6 18836700 18848600 High Signal Region +6 19068900 19075400 High Signal Region +6 20113900 20143500 High Signal Region +6 21452400 21458100 High Signal Region +6 21801300 21803200 High Signal Region +6 21841300 21845300 High Signal Region +6 21873300 21876800 High Signal Region +6 22107700 22131800 High Signal Region +6 22479600 22483900 High Signal Region +6 22516700 22534300 High Signal Region +6 25505600 25566400 Low Mappability +6 26049500 26072100 High Signal Region +6 26247700 26278000 High Signal Region +6 26834800 26840700 High Signal Region +6 26988500 26992000 High Signal Region +6 27199000 27228400 High Signal Region +6 28924100 28929500 Low Mappability +6 29746800 29750000 High Signal Region +6 29974300 29978200 High Signal Region +6 30752800 30806400 High Signal Region +6 30929300 30936100 Low Mappability +6 31594900 31597200 High Signal Region +6 32740700 32746800 High Signal Region +6 32867600 32869000 High Signal Region +6 33490300 33495000 High Signal Region +6 33650500 33665400 High Signal Region +6 33743900 33749000 High Signal Region +6 36224300 36230500 High Signal Region +6 40535500 40559800 Low Mappability +6 40716600 40723700 High Signal Region +6 42122800 42174200 High Signal Region +6 42492600 42516600 High Signal Region +6 42617600 42620900 High Signal Region +6 44265200 44270800 High Signal Region +6 44497000 44513300 High Signal Region +6 44785200 44794100 High Signal Region +6 44836300 44837500 High Signal Region +6 46381300 46402000 High Signal Region +6 46678600 46685300 High Signal Region +6 47639000 47779200 High Signal Region +6 48120300 48122300 High Signal Region +6 48149300 48172900 High Signal Region +6 48231500 48292600 High Signal Region +6 48320300 48347000 High Signal Region +6 49235500 49237500 High Signal Region +6 50601400 50636700 Low Mappability +6 51046500 51048400 High Signal Region +6 53464100 53487500 Low Mappability +6 54976500 54993700 High Signal Region +6 56232700 56257500 High Signal Region +6 56455900 56465300 High Signal Region +6 57425200 57455700 High Signal Region +6 57588900 57634500 High Signal Region +6 57919500 57925700 High Signal Region +6 58068500 58073500 High Signal Region +6 58588700 58612800 High Signal Region +6 59123600 59130100 High Signal Region +6 59199600 59230600 High Signal Region +6 59584300 59598000 High Signal Region +6 59676000 59698200 High Signal Region +6 60622400 60625600 High Signal Region +6 60668000 60688200 High Signal Region +6 61023100 61029400 High Signal Region +6 61088400 61094600 High Signal Region +6 62525500 62527300 High Signal Region +6 64331600 64338900 Low Mappability +6 64778500 64812500 High Signal Region +6 64882100 64930500 High Signal Region +6 65100600 65106700 High Signal Region +6 65184300 65261600 High Signal Region +6 66070200 66095900 High Signal Region +6 66815600 66831600 High Signal Region +6 67311500 67312900 High Signal Region +6 67494800 67522100 Low Mappability +6 67576400 67630800 High Signal Region +6 67658300 67710900 High Signal Region +6 68011000 68012900 High Signal Region +6 68221900 68252400 Low Mappability +6 68641400 68661300 High Signal Region +6 68971900 68996400 High Signal Region +6 69017600 69035700 High Signal Region +6 70000300 70053000 High Signal Region +6 70187800 70213700 High Signal Region +6 70620700 70648600 High Signal Region +6 73105700 73113400 High Signal Region +6 73502200 73521000 High Signal Region +6 73671400 73672600 High Signal Region +6 74191700 74194400 High Signal Region +6 74365900 74386400 High Signal Region +6 74700100 74705300 High Signal Region +6 75054000 75083000 High Signal Region +6 76645400 76649100 High Signal Region +6 76847200 76854100 High Signal Region +6 78352900 78359500 High Signal Region +6 78456200 78491700 Low Mappability +6 78637400 78639700 High Signal Region +6 78716700 78722400 High Signal Region +6 79627500 79635200 High Signal Region +6 79817300 79819200 High Signal Region +6 79898900 79922800 Low Mappability +6 79959800 79967500 Low Mappability +6 81012200 81036700 High Signal Region +6 81829400 81875000 High Signal Region +6 81997000 82011600 High Signal Region +6 82213400 82218800 High Signal Region +6 84662700 84688200 High Signal Region +6 84712600 84720200 High Signal Region +6 89723500 89735600 High Signal Region +6 91768300 91770200 High Signal Region +6 92321600 92328300 High Signal Region +6 94988600 94990700 Low Mappability +6 95030100 95043800 Low Mappability +6 95475600 95479900 High Signal Region +6 95980800 95987100 High Signal Region +6 96877800 96896100 High Signal Region +6 97356800 97379400 High Signal Region +6 101571200 101621400 High Signal Region +6 102379600 102384100 High Signal Region +6 102483000 102505700 High Signal Region +6 102767600 102791400 High Signal Region +6 103313700 103315600 High Signal Region +6 103647900 103650200 High Signal Region +6 103750700 103752000 High Signal Region +6 105194700 105199600 High Signal Region +6 105253400 105257600 Low Mappability +6 105306000 105337600 High Signal Region +6 107141500 107146300 High Signal Region +6 107284300 107299800 High Signal Region +6 107860500 107920500 High Signal Region +6 109498200 109506200 High Signal Region +6 109641800 109648100 High Signal Region +6 109984000 110013000 High Signal Region +6 114340600 114343000 High Signal Region +6 114492200 114643400 High Signal Region +6 116021200 116043900 High Signal Region +6 116238700 116252600 High Signal Region +6 116566200 116593800 High Signal Region +6 117087400 117094300 High Signal Region +6 118209000 118234000 High Signal Region +6 119419600 119431100 High Signal Region +6 121690100 121703800 High Signal Region +6 122614200 122616600 High Signal Region +6 123132100 123179400 High Signal Region +6 123204800 123242900 High Signal Region +6 126135200 126137300 Low Mappability +6 128680200 128693700 High Signal Region +6 128861200 128865300 High Signal Region +6 129857800 129863300 High Signal Region +6 129935700 129948400 High Signal Region +6 131088300 131114900 High Signal Region +6 131208300 131252100 High Signal Region +6 131495900 131505900 High Signal Region +6 132497200 132523000 Low Mappability +6 132597000 132598700 High Signal Region +6 132635400 132642000 High Signal Region +6 133169000 133170900 High Signal Region +6 133891500 133899800 High Signal Region +6 134689500 134692700 High Signal Region +6 138216100 138221900 High Signal Region +6 138647300 138649100 High Signal Region +6 138685400 138700700 High Signal Region +6 142060700 142079300 High Signal Region +6 142396700 142400200 Low Mappability +6 142433400 142439400 High Signal Region +6 143014400 143016300 High Signal Region +6 143466500 143481400 High Signal Region +6 143883500 143886900 High Signal Region +6 144655200 144670000 High Signal Region +6 145784700 145787000 High Signal Region +6 145931800 145933900 Low Mappability +6 146018900 146080500 High Signal Region +6 147077200 147079900 High Signal Region +6 147459800 147465000 Low Mappability +6 147549600 147555000 Low Mappability +6 147881900 147908400 High Signal Region +6 148013100 148038400 High Signal Region +6 148121800 148124500 High Signal Region +6 148635700 148640300 Low Mappability +6 148662900 148665000 Low Mappability +6 149585500 149736500 High Signal Region +7 4558200 4594300 High Signal Region +7 4648600 4651500 High Signal Region +7 5153200 5244900 High Signal Region +7 5588700 5591600 High Signal Region +7 6050500 6056000 High Signal Region +7 6249400 6251400 High Signal Region +7 6590800 6597400 High Signal Region +7 7209500 7231000 High Signal Region +7 7273500 7327400 High Signal Region +7 7527500 7533900 High Signal Region +7 7556800 8278400 High Signal Region +7 8490800 9968800 High Signal Region +7 9992100 9998900 High Signal Region +7 10314900 10320900 High Signal Region +7 11097700 11123700 High Signal Region +7 11271100 11438600 High Signal Region +7 12009500 12084600 High Signal Region +7 12379600 12385400 High Signal Region +7 12526600 12548100 High Signal Region +7 13112300 13118100 High Signal Region +7 13591200 13620200 High Signal Region +7 14051300 14055900 High Signal Region +7 14767700 14823800 High Signal Region +7 14930100 15023000 High Signal Region +7 15128800 15623000 High Signal Region +7 16661400 16667800 High Signal Region +7 17112200 17123900 High Signal Region +7 17215800 17323400 High Signal Region +7 17800000 17806700 High Signal Region +7 17829700 17862600 High Signal Region +7 18487100 18493200 High Signal Region +7 19032600 19034500 High Signal Region +7 20799700 21103900 High Signal Region +7 21135700 23286800 High Signal Region +7 23494700 23503600 High Signal Region +7 24026200 24031700 High Signal Region +7 24103800 24108200 High Signal Region +7 24729400 24731300 High Signal Region +7 26022700 26066900 High Signal Region +7 26779000 26780900 High Signal Region +7 27082300 27098300 High Signal Region +7 27712800 27732500 High Signal Region +7 31365500 31387000 High Signal Region +7 31818200 31876700 High Signal Region +7 31934500 32043100 High Signal Region +7 32215700 32235200 High Signal Region +7 32629300 33098700 High Signal Region +7 33124200 33198000 High Signal Region +7 33949500 34004800 High Signal Region +7 34957200 34959100 High Signal Region +7 38396600 38787200 High Signal Region +7 38839800 39181000 High Signal Region +7 39227600 39404100 High Signal Region +7 39874600 39875900 High Signal Region +7 41791900 41851900 High Signal Region +7 43123800 43220300 High Signal Region +7 44737800 44739900 High Signal Region +7 47175100 47188600 High Signal Region +7 47414400 47519700 High Signal Region +7 48102600 48135800 High Signal Region +7 50940400 50986800 High Signal Region +7 51329800 51335900 High Signal Region +7 51800300 51812600 High Signal Region +7 51909200 51911200 High Signal Region +7 52095700 52104400 High Signal Region +7 52283300 52288900 High Signal Region +7 53677100 53683100 High Signal Region +7 53977800 54027400 High Signal Region +7 54336000 54351800 High Signal Region +7 54808900 54810100 High Signal Region +7 54923000 54971200 High Signal Region +7 55011500 55016500 High Signal Region +7 55080000 55086300 High Signal Region +7 55115400 55141000 High Signal Region +7 55657400 55667100 High Signal Region +7 56062300 56081700 High Signal Region +7 56160100 56163400 Low Mappability +7 56660300 56693600 High Signal Region +7 57367200 57374700 High Signal Region +7 58040300 58077100 High Signal Region +7 58161700 58177900 High Signal Region +7 59673100 59910900 High Signal Region +7 60209400 60215600 High Signal Region +7 60676300 60682800 High Signal Region +7 61320100 61395400 High Signal Region +7 62135200 62137500 High Signal Region +7 62651400 62693400 High Signal Region +7 63272500 63287100 High Signal Region +7 63431300 63432400 High Signal Region +7 63803700 63810800 High Signal Region +7 63908200 63910100 High Signal Region +7 64072600 64134600 High Signal Region +7 64465300 64496400 High Signal Region +7 64601000 64617900 High Signal Region +7 65187500 65198300 High Signal Region +7 68534700 68537900 High Signal Region +7 68775900 68778100 High Signal Region +7 69086500 69102900 High Signal Region +7 69785300 69792200 High Signal Region +7 70757900 70765000 High Signal Region +7 71971100 71984500 High Signal Region +7 72317400 72337900 High Signal Region +7 72630000 72679900 High Signal Region +7 73212000 73218800 High Signal Region +7 73671700 73680000 High Signal Region +7 75003200 75007700 High Signal Region +7 76067800 76079300 High Signal Region +7 76556000 76573000 High Signal Region +7 76703900 76708400 High Signal Region +7 77520600 77526000 High Signal Region +7 78416900 78422400 High Signal Region +7 80708100 80730100 Low Mappability +7 80787500 80813800 High Signal Region +7 81756100 81760500 High Signal Region +7 82770300 82772800 High Signal Region +7 85017700 85023600 High Signal Region +7 85757200 85768800 High Signal Region +7 86118700 86125800 High Signal Region +7 86497400 86503500 High Signal Region +7 86532600 86534000 High Signal Region +7 86805600 86807500 High Signal Region +7 87989300 88000600 High Signal Region +7 89683300 89704600 High Signal Region +7 90087300 90089400 High Signal Region +7 90441000 90442900 High Signal Region +7 91741500 91747500 High Signal Region +7 93259400 93278100 High Signal Region +7 93699600 93717500 High Signal Region +7 93744000 93766100 High Signal Region +7 93969600 93973700 High Signal Region +7 94293000 94299300 High Signal Region +7 94822500 94848800 High Signal Region +7 95177200 95193600 High Signal Region +7 95527400 95533200 High Signal Region +7 97795000 97797300 High Signal Region +7 103100800 103115000 High Signal Region +7 103195500 103202100 High Signal Region +7 103483000 103487500 High Signal Region +7 104097400 104126600 High Signal Region +7 104476800 104477900 High Signal Region +7 104770000 104801200 High Signal Region +7 105830300 106325300 High Signal Region +7 106979000 106984900 High Signal Region +7 107245200 107271400 High Signal Region +7 108780600 108789800 High Signal Region +7 110058500 110061600 High Signal Region +7 111228400 111230600 High Signal Region +7 112636600 112639800 High Signal Region +7 116432200 116453400 High Signal Region +7 119739900 119742100 High Signal Region +7 119795700 119797700 High Signal Region +7 119998800 120015100 High Signal Region +7 124522300 124528300 High Signal Region +7 125009800 125016600 High Signal Region +7 128171000 128189300 High Signal Region +7 130054200 130055700 High Signal Region +7 130591400 130596900 High Signal Region +7 130833500 130835600 High Signal Region +7 134100500 134107200 High Signal Region +7 134329200 134335200 High Signal Region +7 135006900 135008800 High Signal Region +7 135337800 135340900 High Signal Region +7 138590500 138594500 High Signal Region +7 139447400 139448900 High Signal Region +7 140288200 140307300 High Signal Region +7 140551100 140558800 High Signal Region +7 140580500 140585700 High Signal Region +7 141637000 141640700 High Signal Region +7 142828900 142845000 High Signal Region +7 145340000 145441400 High Signal Region +8 3753500 3779100 High Signal Region +8 14305800 14308200 High Signal Region +8 15508900 15521000 High Signal Region +8 19671800 19937800 High Signal Region +8 19960800 20868000 High Signal Region +8 20945500 20963700 High Signal Region +8 23085600 23096700 High Signal Region +8 35134000 35135900 High Signal Region +8 39132400 39157700 High Signal Region +8 55111200 55397300 High Signal Region +8 69416700 69597900 High Signal Region +8 71432100 71434100 High Signal Region +8 71796100 71863300 High Signal Region +8 73318700 73320700 High Signal Region +8 83755800 83757900 High Signal Region +8 114436000 114437900 High Signal Region +8 123537300 123638300 High Signal Region +8 125778100 125780100 High Signal Region +8 129272900 129401200 High Signal Region +9 0 3053100 High Signal Region +9 3240200 3259800 High Signal Region +9 3302000 3336000 High Signal Region +9 3461000 3466600 Low Mappability +9 3627400 3699700 Low Mappability +9 3802100 3806700 High Signal Region +9 3881100 3887600 High Signal Region +9 4238700 4245700 Low Mappability +9 4375700 4406800 High Signal Region +9 5248000 5254100 High Signal Region +9 5276200 5284600 Low Mappability +9 6431500 6467200 High Signal Region +9 6742900 6806200 Low Mappability +9 7294600 7300700 High Signal Region +9 7370900 7412600 Low Mappability +9 7520900 7525900 High Signal Region +9 8029400 8067100 Low Mappability +9 8275900 8292300 Low Mappability +9 8447200 8483700 High Signal Region +9 8628200 8633700 Low Mappability +9 8859900 8865500 High Signal Region +9 9598800 9626700 High Signal Region +9 9846900 9891900 Low Mappability +9 10193200 10198800 Low Mappability +9 10701300 10707400 High Signal Region +9 10964200 10970600 High Signal Region +9 11341900 11345100 High Signal Region +9 11722300 11747100 High Signal Region +9 11792800 11798400 Low Mappability +9 11821400 11845400 High Signal Region +9 12282000 12287500 High Signal Region +9 12364900 12379600 High Signal Region +9 12469100 12472900 Low Mappability +9 12768200 12773800 High Signal Region +9 12840100 12851100 High Signal Region +9 12917600 12922300 High Signal Region +9 12998400 13045600 Low Mappability +9 13324200 13426100 High Signal Region +9 13533500 13535700 High Signal Region +9 13994600 13996700 High Signal Region +9 14410500 14429300 Low Mappability +9 15123900 15136900 High Signal Region +9 16607400 16691900 Low Mappability +9 16833700 16861000 High Signal Region +9 16939400 16950500 Low Mappability +9 17059000 17088000 High Signal Region +9 17197900 17207600 High Signal Region +9 17261400 17263400 Low Mappability +9 17387200 17406200 High Signal Region +9 17525800 17527700 High Signal Region +9 17632000 17636100 High Signal Region +9 17916200 17919600 High Signal Region +9 18010000 18015600 High Signal Region +9 18117000 18162200 Low Mappability +9 18235100 18270100 High Signal Region +9 18893800 18900100 High Signal Region +9 18980400 18994100 High Signal Region +9 19268700 19294700 High Signal Region +9 19595400 19638400 High Signal Region +9 19720500 19725500 Low Mappability +9 19901400 19906100 High Signal Region +9 20183600 20196700 Low Mappability +9 20322100 20407900 High Signal Region +9 21879200 21928200 High Signal Region +9 22116600 22191600 High Signal Region +9 22699500 22731700 High Signal Region +9 22892700 22926500 Low Mappability +9 22947900 22956900 High Signal Region +9 23508700 23526900 High Signal Region +9 24523300 24576000 High Signal Region +9 25596700 25602700 High Signal Region +9 25842900 25863600 High Signal Region +9 26096100 26103500 Low Mappability +9 26700800 26708000 High Signal Region +9 26904600 26911000 High Signal Region +9 27212200 27232300 High Signal Region +9 27974400 27981700 High Signal Region +9 29739800 29741800 Low Mappability +9 30604400 30606300 Low Mappability +9 30641800 30696800 Low Mappability +9 30929800 30931100 High Signal Region +9 32059200 32083600 Low Mappability +9 32353900 32356500 High Signal Region +9 32839200 32846600 Low Mappability +9 32888700 32896000 Low Mappability +9 32953000 32958100 Low Mappability +9 33127100 33161100 Low Mappability +9 33392400 33402700 High Signal Region +9 33949500 33961900 Low Mappability +9 35071200 35091800 High Signal Region +9 35304300 35306500 High Signal Region +9 36235800 36241900 High Signal Region +9 36555000 36569100 High Signal Region +9 37331400 37349500 Low Mappability +9 37441700 37448100 High Signal Region +9 39330900 39359100 High Signal Region +9 39444100 39449600 High Signal Region +9 39835400 39899000 Low Mappability +9 44214200 44235400 Low Mappability +9 44305700 44408400 Low Mappability +9 47957400 47959300 High Signal Region +9 50082000 50088400 High Signal Region +9 51667400 51673700 High Signal Region +9 52601800 52617200 High Signal Region +9 52749000 52756100 High Signal Region +9 53089800 53107000 High Signal Region +9 53804100 53805400 High Signal Region +9 54916200 54928900 High Signal Region +9 55070600 55078000 Low Mappability +9 55150300 55152300 High Signal Region +9 55936900 55972500 High Signal Region +9 56222700 56224800 High Signal Region +9 56259500 56284300 High Signal Region +9 56991700 56993700 Low Mappability +9 57408000 57434800 High Signal Region +9 58766500 58785800 High Signal Region +9 59046200 59052700 Low Mappability +9 59103800 59125000 High Signal Region +9 60538500 60551200 High Signal Region +9 60726100 60733500 High Signal Region +9 61721500 61723400 High Signal Region +9 62811600 62868300 Low Mappability +9 64236700 64255000 Low Mappability +9 64410400 64417700 Low Mappability +9 65292600 65314200 High Signal Region +9 65867400 65909400 High Signal Region +9 67198600 67205000 Low Mappability +9 68451200 68461200 High Signal Region +9 68527100 68534600 High Signal Region +9 71080600 71120800 Low Mappability +9 71421100 71434600 High Signal Region +9 72895800 72900800 Low Mappability +9 72957900 72985700 Low Mappability +9 73285500 73311300 High Signal Region +9 73396800 73412500 Low Mappability +9 73861400 73863500 Low Mappability +9 73935600 73946700 High Signal Region +9 74615600 74641300 Low Mappability +9 74664800 74690900 High Signal Region +9 74768600 74774600 High Signal Region +9 75709200 75736000 Low Mappability +9 77079900 77082800 High Signal Region +9 77152800 77158800 High Signal Region +9 77972400 77974300 High Signal Region +9 78175200 78182700 Low Mappability +9 78230500 78296900 High Signal Region +9 78554700 78589200 Low Mappability +9 78755200 78757800 High Signal Region +9 78819200 78830500 Low Mappability +9 80234500 80235700 High Signal Region +9 80660700 80665600 High Signal Region +9 81251500 81303200 High Signal Region +9 81614000 81620700 High Signal Region +9 81906400 81937200 High Signal Region +9 83278800 83288100 High Signal Region +9 83558300 83560200 High Signal Region +9 83935500 83950000 High Signal Region +9 83992400 83998900 High Signal Region +9 84211900 84226800 High Signal Region +9 85898900 85918900 High Signal Region +9 86062600 86070000 Low Mappability +9 86120100 86137500 High Signal Region +9 86458200 86463100 High Signal Region +9 87098700 87112200 High Signal Region +9 87481400 87500900 High Signal Region +9 87576700 87594000 High Signal Region +9 87945600 87952400 High Signal Region +9 88011000 88013900 High Signal Region +9 88592100 88829800 High Signal Region +9 89031300 89075400 Low Mappability +9 89321400 89361800 High Signal Region +9 90147100 90149100 High Signal Region +9 90285200 90395300 High Signal Region +9 90455400 90456800 High Signal Region +9 90808100 90821900 Low Mappability +9 90857200 90876300 Low Mappability +9 91222100 91268200 High Signal Region +9 91598800 91647400 High Signal Region +9 92032700 92035300 High Signal Region +9 92075300 92113200 High Signal Region +9 92239700 92242900 High Signal Region +9 92624800 92654500 High Signal Region +9 93013300 93035300 High Signal Region +9 93286500 93296500 High Signal Region +9 93360800 93442100 Low Mappability +9 93618000 93668500 Low Mappability +9 94821700 94828100 Low Mappability +9 95245800 95299600 High Signal Region +9 95425000 95426900 High Signal Region +9 95829400 95831300 High Signal Region +9 96104900 96111400 Low Mappability +9 96852000 96854100 High Signal Region +9 98343300 98345700 Low Mappability +9 98451100 98458500 Low Mappability +9 98747700 98771800 Low Mappability +9 99266600 99273100 Low Mappability +9 99735800 99763300 High Signal Region +9 99922800 99937600 High Signal Region +9 100073800 100080700 High Signal Region +9 100516900 100519200 High Signal Region +9 100920400 100922300 High Signal Region +9 101085500 101110600 High Signal Region +9 101292500 101326600 Low Mappability +9 102277400 102283800 Low Mappability +9 102764700 102766800 Low Mappability +9 102812800 102815000 High Signal Region +9 102956300 102970000 Low Mappability +9 103296200 103305600 High Signal Region +9 103352800 103367100 Low Mappability +9 103988500 103990400 High Signal Region +9 104524500 104525700 High Signal Region +9 104848800 104850600 High Signal Region +9 105086200 105119300 High Signal Region +9 105818400 105820400 High Signal Region +9 107207900 107219900 High Signal Region +9 109036600 109083500 High Signal Region +9 109245000 109252200 High Signal Region +9 109272900 109374100 High Signal Region +9 110280300 110306700 High Signal Region +9 110443100 110455100 High Signal Region +9 110970300 110976000 High Signal Region +9 111661900 111668700 High Signal Region +9 112330100 112336900 High Signal Region +9 112956300 112990600 High Signal Region +9 113260500 113262400 High Signal Region +9 113535400 113541300 High Signal Region +9 114101400 114149500 Low Mappability +9 114172400 114322200 High Signal Region +9 114970100 114974700 Low Mappability +9 115077900 115085200 Low Mappability +9 115349900 115351800 High Signal Region +9 115496100 115498100 Low Mappability +9 116981500 116988600 High Signal Region +9 118088300 118151400 High Signal Region +9 118674000 118675900 High Signal Region +9 119861200 119895000 Low Mappability +9 120265300 120288700 High Signal Region +9 120633900 120641200 Low Mappability +9 121024600 121042700 Low Mappability +9 121178300 121184500 High Signal Region +9 121220100 121247600 High Signal Region +9 121313700 121385800 Low Mappability +9 121406300 121418400 Low Mappability +9 122161300 122163200 High Signal Region +9 122277700 122334500 Low Mappability +9 122401500 122441900 Low Mappability +9 122660600 122667200 Low Mappability +9 122703400 122730400 Low Mappability +9 122903900 122906600 High Signal Region +9 123190700 123197500 Low Mappability +9 123460900 123463100 High Signal Region +9 123742600 123753500 Low Mappability +9 123851700 123929500 High Signal Region +9 123966100 124009300 High Signal Region +9 124161300 124282600 High Signal Region +9 124494100 124595100 High Signal Region +X 3286700 4493800 High Signal Region +X 4524500 5370300 High Signal Region +X 8346400 8348200 High Signal Region +X 8550300 8557800 High Signal Region +X 8818900 8824300 High Signal Region +X 9345800 9395300 High Signal Region +X 9500200 9595700 High Signal Region +X 14739100 14741000 High Signal Region +X 21466500 21472700 High Signal Region +X 21846900 21896100 High Signal Region +X 26459300 26505100 High Signal Region +X 26907100 29639200 High Signal Region +X 29660500 35508900 High Signal Region +X 37612500 37669100 High Signal Region +X 39073800 39075700 High Signal Region +X 41482500 41489500 High Signal Region +X 42676200 42688100 High Signal Region +X 44239900 44293300 High Signal Region +X 44732600 44738600 High Signal Region +X 48699000 48771100 High Signal Region +X 54269300 55286000 High Signal Region +X 55716700 55807400 High Signal Region +X 58475000 58478700 High Signal Region +X 59773000 59796900 High Signal Region +X 61868200 61874000 High Signal Region +X 62065700 62084900 High Signal Region +X 63509200 63515900 High Signal Region +X 63634600 63640900 High Signal Region +X 64125800 64132200 High Signal Region +X 65962800 65999900 High Signal Region +X 66067900 66084000 High Signal Region +X 66143100 66145700 High Signal Region +X 66316400 66356900 High Signal Region +X 67662500 67708500 High Signal Region +X 70055300 70072000 High Signal Region +X 72800000 72818700 High Signal Region +X 75582400 75709000 High Signal Region +X 76589100 76607100 High Signal Region +X 79135300 79150400 High Signal Region +X 81153100 81154600 High Signal Region +X 82475800 82481000 High Signal Region +X 84290800 84296100 High Signal Region +X 87222400 87262500 High Signal Region +X 87838600 87845200 High Signal Region +X 88230200 88246900 High Signal Region +X 89182800 89232600 High Signal Region +X 89914800 89916600 High Signal Region +X 90308600 90336600 High Signal Region +X 92765200 92767900 High Signal Region +X 94795400 94980600 High Signal Region +X 95265900 95291700 High Signal Region +X 97728000 97734800 High Signal Region +X 98008600 98033000 High Signal Region +X 98585800 98612400 High Signal Region +X 101111300 101113600 High Signal Region +X 102560800 102585100 High Signal Region +X 103455000 103457100 High Signal Region +X 104959400 104966000 High Signal Region +X 105523800 105529900 High Signal Region +X 108202600 108222500 High Signal Region +X 108567500 108585200 High Signal Region +X 109871000 109876200 High Signal Region +X 110976700 110997000 High Signal Region +X 112369800 112402300 High Signal Region +X 114412500 114421300 High Signal Region +X 118100900 118102900 High Signal Region +X 118901200 118905100 Low Mappability +X 119137300 119142400 High Signal Region +X 119247400 119264800 High Signal Region +X 119335000 119339300 High Signal Region +X 120351000 120355400 High Signal Region +X 121511200 121514500 High Signal Region +X 122901700 122908000 High Signal Region +X 123686000 124042000 High Signal Region +X 126695300 126778800 High Signal Region +X 127935800 127964600 High Signal Region +X 128512700 128514400 High Signal Region +X 128959800 128965900 High Signal Region +X 129055600 129072400 High Signal Region +X 129429300 129448000 High Signal Region +X 130696000 130702200 High Signal Region +X 131802300 131832800 High Signal Region +X 132024200 132026400 High Signal Region +X 132158700 132160800 High Signal Region +X 134149100 134151200 High Signal Region +X 135040100 135056700 High Signal Region +X 136459400 136503800 High Signal Region +X 136897900 136925800 High Signal Region +X 138302200 138324600 High Signal Region +X 143471300 143484000 High Signal Region +X 144699500 144723900 High Signal Region +X 145709800 145739800 High Signal Region +X 146582500 146588700 High Signal Region +X 146758100 146761900 High Signal Region +X 147619400 147620700 High Signal Region +X 153994800 154073200 High Signal Region +X 154242800 154244800 High Signal Region +X 158443900 158460500 High Signal Region +X 159120000 159154900 High Signal Region +X 161179200 161185600 High Signal Region +X 162381600 162384600 High Signal Region +X 164615100 164622200 High Signal Region +X 166063200 166084500 High Signal Region +X 167213400 167220200 High Signal Region +X 167246000 167252200 High Signal Region +X 169968900 171031200 High Signal Region +Y 0 806800 High Signal Region +Y 924800 1005300 High Signal Region +Y 1276400 1813700 High Signal Region +Y 1834500 1940700 High Signal Region +Y 1973200 1996400 High Signal Region +Y 2017200 2068000 Low Mappability +Y 2104700 2210800 High Signal Region +Y 2280300 2288900 Low Mappability +Y 2471300 3819300 High Signal Region +Y 3880300 4177100 High Signal Region +Y 4249500 4289100 High Signal Region +Y 4432000 4956300 High Signal Region +Y 5062400 5227700 High Signal Region +Y 6376700 6382700 High Signal Region +Y 6530200 6663200 High Signal Region +Y 6760200 6835800 High Signal Region +Y 6984100 8985400 High Signal Region +Y 10638500 41003800 High Signal Region +Y 41159200 91744600 High Signal Region diff --git a/assets/blacklists/v2.0/ce10-blacklist.v2.bed b/assets/blacklists/v2.0/ce10-blacklist.v2.bed new file mode 100644 index 000000000..99566cec9 --- /dev/null +++ b/assets/blacklists/v2.0/ce10-blacklist.v2.bed @@ -0,0 +1,100 @@ +chrIII 449300 453600 High Signal Region +chrIII 930200 932600 High Signal Region +chrIII 1016500 1021400 Low Mappability +chrIII 1293500 1303900 High Signal Region +chrIII 5352200 5359400 High Signal Region +chrIII 7404700 7452200 High Signal Region +chrIII 7593800 7603300 High Signal Region +chrIII 8861100 8864700 High Signal Region +chrIII 10215600 10228400 High Signal Region +chrIII 13775100 14905900 High Signal Region +chrII 0 1300 High Signal Region +chrII 2569700 2571600 High Signal Region +chrII 3464700 3469600 High Signal Region +chrII 3795600 3798100 High Signal Region +chrII 3993700 3995700 High Signal Region +chrII 4640300 4645900 High Signal Region +chrII 5143800 5147500 High Signal Region +chrII 6504400 6509800 High Signal Region +chrII 8286700 8293600 High Signal Region +chrII 8975300 8977400 High Signal Region +chrII 9631400 9633700 High Signal Region +chrII 10335100 10339700 High Signal Region +chrII 11527100 11530900 High Signal Region +chrII 12842800 12846900 Low Mappability +chrII 13597700 13600700 Low Mappability +chrII 13983900 13987500 Low Mappability +chrII 14323700 14340100 High Signal Region +chrII 14992100 14994300 High Signal Region +chrI 669000 679400 High Signal Region +chrI 931700 935600 High Signal Region +chrI 3170700 3173300 High Signal Region +chrI 3989200 3991600 High Signal Region +chrI 4535300 4549400 High Signal Region +chrI 5151000 5154700 High Signal Region +chrI 10203200 10220100 High Signal Region +chrI 10265700 10277700 High Signal Region +chrI 10945100 10953900 High Signal Region +chrI 15059400 15373800 High Signal Region +chrIV 2821200 2831500 High Signal Region +chrIV 3205500 3210300 High Signal Region +chrIV 3365800 3368900 Low Mappability +chrIV 4415600 4422900 High Signal Region +chrIV 6357100 6361700 High Signal Region +chrIV 6468100 6470600 Low Mappability +chrIV 6682800 6704100 High Signal Region +chrIV 6709900 6734000 High Signal Region +chrIV 7590900 7600100 High Signal Region +chrIV 8563000 8582700 High Signal Region +chrIV 9045100 9049600 High Signal Region +chrIV 10942600 10951900 High Signal Region +chrIV 11070000 11076700 High Signal Region +chrIV 12313600 12325600 High Signal Region +chrIV 12637100 12639400 Low Mappability +chrIV 13359700 13362800 High Signal Region +chrIV 13548500 13550400 High Signal Region +chrIV 14056400 14059900 High Signal Region +chrIV 14775600 14782300 Low Mappability +chrIV 15408400 15424900 Low Mappability +chrV 264100 268100 High Signal Region +chrV 1103700 1106100 Low Mappability +chrV 1637000 1639900 High Signal Region +chrV 3098000 3100300 High Signal Region +chrV 3434000 3441600 High Signal Region +chrV 5072400 5084800 High Signal Region +chrV 5278300 5286700 High Signal Region +chrV 6171000 6183700 High Signal Region +chrV 6936700 6943800 High Signal Region +chrV 7442400 7445100 High Signal Region +chrV 7912200 7925700 High Signal Region +chrV 7988100 7993900 High Signal Region +chrV 8698600 8715600 High Signal Region +chrV 9423500 9436100 High Signal Region +chrV 10604900 10613200 High Signal Region +chrV 12509100 12511300 High Signal Region +chrV 14765800 14770600 High Signal Region +chrV 15425800 15436300 High Signal Region +chrV 16706300 16710300 High Signal Region +chrV 17114500 17133000 High Signal Region +chrV 17307700 17312400 High Signal Region +chrV 17383300 17395200 Low Mappability +chrV 18399500 18402500 High Signal Region +chrX 108300 115100 High Signal Region +chrX 273800 296200 High Signal Region +chrX 1635300 1645200 High Signal Region +chrX 1747400 1755900 High Signal Region +chrX 3006400 3008800 High Signal Region +chrX 4025200 4056900 High Signal Region +chrX 5045000 5058100 High Signal Region +chrX 7076500 7081700 High Signal Region +chrX 9184100 9189500 High Signal Region +chrX 9437700 9440000 High Signal Region +chrX 10360200 10369900 High Signal Region +chrX 11784800 11790300 High Signal Region +chrX 11885600 11889600 High Signal Region +chrX 12275900 12280400 High Signal Region +chrX 14384000 14390400 High Signal Region +chrX 14907200 14910200 High Signal Region +chrX 15226100 15229500 High Signal Region +chrX 15806300 15812000 Low Mappability +chrX 16757900 16761600 High Signal Region diff --git a/assets/blacklists/v2.0/ce11-blacklist.v2.bed b/assets/blacklists/v2.0/ce11-blacklist.v2.bed new file mode 100644 index 000000000..66f9d730b --- /dev/null +++ b/assets/blacklists/v2.0/ce11-blacklist.v2.bed @@ -0,0 +1,97 @@ +chrIII 449400 453600 High Signal Region +chrIII 930300 932600 High Signal Region +chrIII 1016500 1021500 Low Mappability +chrIII 1293500 1303900 High Signal Region +chrIII 5352200 5359400 High Signal Region +chrIII 7404700 7452200 High Signal Region +chrIII 7593800 7598100 High Signal Region +chrIII 8861200 8864800 High Signal Region +chrIII 10216000 10228500 High Signal Region +chrIII 13775200 13783800 High Signal Region +chrII 0 1300 High Signal Region +chrII 2569700 2571800 High Signal Region +chrII 3464700 3469600 High Signal Region +chrII 3795600 3798100 High Signal Region +chrII 3993700 3995700 High Signal Region +chrII 4640300 4645900 High Signal Region +chrII 5143800 5147500 High Signal Region +chrII 6504500 6509800 High Signal Region +chrII 8286700 8293600 High Signal Region +chrII 8975300 8977500 High Signal Region +chrII 9631500 9633800 High Signal Region +chrII 10335100 10339700 High Signal Region +chrII 11527200 11530900 High Signal Region +chrII 12842900 12846900 Low Mappability +chrII 13597800 13600800 High Signal Region +chrII 13984000 13987600 Low Mappability +chrII 14323700 14340100 High Signal Region +chrII 14992200 14994400 High Signal Region +chrI 669000 679200 High Signal Region +chrI 931700 935600 High Signal Region +chrI 3170700 3173300 High Signal Region +chrI 3989200 3991600 High Signal Region +chrI 4535300 4549400 High Signal Region +chrI 5151000 5154700 High Signal Region +chrI 10203200 10220100 High Signal Region +chrI 10265700 10277700 High Signal Region +chrI 10945100 10953900 High Signal Region +chrI 15059400 15072400 High Signal Region +chrIV 2821100 2831500 High Signal Region +chrIV 3205500 3210300 High Signal Region +chrIV 3365800 3368800 Low Mappability +chrIV 4415600 4422800 High Signal Region +chrIV 6357100 6361700 High Signal Region +chrIV 6468100 6470600 Low Mappability +chrIV 6682700 6704200 High Signal Region +chrIV 7590800 7600100 High Signal Region +chrIV 8563000 8582600 High Signal Region +chrIV 9045100 9049600 High Signal Region +chrIV 10942600 10951900 High Signal Region +chrIV 11070000 11076700 High Signal Region +chrIV 12023000 12025800 High Signal Region +chrIV 12313600 12325600 High Signal Region +chrIV 12637100 12639500 Low Mappability +chrIV 13359700 13362800 High Signal Region +chrIV 13548500 13550500 High Signal Region +chrIV 14056400 14059900 High Signal Region +chrIV 14775600 14782300 Low Mappability +chrIV 15076800 15082900 High Signal Region +chrIV 15408500 15424900 Low Mappability +chrV 264100 268100 High Signal Region +chrV 1103700 1106100 Low Mappability +chrV 1637000 1639900 High Signal Region +chrV 3098000 3100400 High Signal Region +chrV 3434100 3441600 High Signal Region +chrV 5278300 5286800 High Signal Region +chrV 6171000 6183700 High Signal Region +chrV 6936700 6943900 High Signal Region +chrV 7912300 7925700 High Signal Region +chrV 7988100 7993700 High Signal Region +chrV 8698700 8715700 High Signal Region +chrV 9423500 9436100 High Signal Region +chrV 10604900 10613200 High Signal Region +chrV 12509100 12511300 High Signal Region +chrV 14765800 14770600 High Signal Region +chrV 16706300 16710300 High Signal Region +chrV 17114600 17133000 High Signal Region +chrV 17307800 17312400 High Signal Region +chrV 17383300 17395200 High Signal Region +chrV 18399500 18402500 High Signal Region +chrX 108300 115100 High Signal Region +chrX 273800 296200 High Signal Region +chrX 1635300 1645200 High Signal Region +chrX 1747400 1755900 High Signal Region +chrX 3006400 3008800 High Signal Region +chrX 4025200 4056900 High Signal Region +chrX 5045000 5058200 High Signal Region +chrX 7076600 7081600 High Signal Region +chrX 9184200 9189600 High Signal Region +chrX 9437700 9440000 High Signal Region +chrX 10360300 10370000 High Signal Region +chrX 11784800 11790400 High Signal Region +chrX 11885700 11889700 High Signal Region +chrX 12275900 12280400 High Signal Region +chrX 14907200 14910300 High Signal Region +chrX 15226200 15229600 High Signal Region +chrX 15806400 15812100 Low Mappability +chrX 16758000 16761600 High Signal Region diff --git a/assets/blacklists/v2.0/dm3-blacklist.v2.bed b/assets/blacklists/v2.0/dm3-blacklist.v2.bed new file mode 100644 index 000000000..45f1a2017 --- /dev/null +++ b/assets/blacklists/v2.0/dm3-blacklist.v2.bed @@ -0,0 +1,271 @@ +chr2LHet 43400 150800 High Signal Region +chr2LHet 350800 368800 Low Mappability +chr2L 47200 52500 High Signal Region +chr2L 66000 74500 High Signal Region +chr2L 154400 167500 High Signal Region +chr2L 221100 223300 High Signal Region +chr2L 471400 491700 High Signal Region +chr2L 2191400 2201000 High Signal Region +chr2L 2749200 2756400 High Signal Region +chr2L 2884100 2889800 High Signal Region +chr2L 3161500 3164300 High Signal Region +chr2L 4937900 4941000 High Signal Region +chr2L 5206500 5210500 High Signal Region +chr2L 5943200 5949200 High Signal Region +chr2L 5976600 5987500 High Signal Region +chr2L 6991400 6998500 Low Mappability +chr2L 7343400 7350800 High Signal Region +chr2L 9898700 9903100 High Signal Region +chr2L 9973800 9980900 Low Mappability +chr2L 10333600 10335600 High Signal Region +chr2L 11992000 12013100 High Signal Region +chr2L 12558300 12565400 Low Mappability +chr2L 13522100 13527800 Low Mappability +chr2L 14489600 14491600 High Signal Region +chr2L 16267500 16271800 High Signal Region +chr2L 16283800 16289200 High Signal Region +chr2L 16512100 16526900 Low Mappability +chr2L 18942900 18945500 High Signal Region +chr2L 19570700 19588100 High Signal Region +chr2L 20647200 20649100 High Signal Region +chr2L 21019900 21024300 Low Mappability +chr2L 21236700 21238800 High Signal Region +chr2L 21416300 21544000 High Signal Region +chr2L 22378300 22389200 High Signal Region +chr2L 22657900 22670900 High Signal Region +chr2RHet 674900 692100 Low Mappability +chr2RHet 1142300 1263300 Low Mappability +chr2RHet 1422500 1435900 High Signal Region +chr2RHet 2823000 2830800 High Signal Region +chr2RHet 2924700 2989000 High Signal Region +chr2RHet 3179900 3183800 High Signal Region +chr2RHet 3269500 3288700 Low Mappability +chr2R 101000 118400 High Signal Region +chr2R 201300 207900 High Signal Region +chr2R 934100 944600 High Signal Region +chr2R 992900 997600 High Signal Region +chr2R 2217200 2303300 High Signal Region +chr2R 2548500 2551600 High Signal Region +chr2R 3123000 3137600 High Signal Region +chr2R 3322500 3326700 Low Mappability +chr2R 3495700 3501600 Low Mappability +chr2R 3692900 3720700 High Signal Region +chr2R 3902800 3905600 Low Mappability +chr2R 4552800 4555900 High Signal Region +chr2R 5367700 5378100 Low Mappability +chr2R 5430900 5442200 High Signal Region +chr2R 5615200 5621600 High Signal Region +chr2R 6311300 6318700 High Signal Region +chr2R 6364800 6368500 High Signal Region +chr2R 6420800 6430300 High Signal Region +chr2R 6835600 6843000 High Signal Region +chr2R 7538100 7540600 High Signal Region +chr2R 8473600 8481800 High Signal Region +chr2R 8706700 8712000 Low Mappability +chr2R 8867500 8873800 High Signal Region +chr2R 8883000 8885600 High Signal Region +chr2R 9981000 9997000 Low Mappability +chr2R 10076400 10082900 High Signal Region +chr2R 10776800 10785300 Low Mappability +chr2R 11985200 11992700 High Signal Region +chr2R 13034800 13040100 Low Mappability +chr2R 13344900 13346800 High Signal Region +chr2R 13569800 13571700 High Signal Region +chr2R 14243000 14260400 High Signal Region +chr2R 14463000 14469700 Low Mappability +chr2R 14745600 14747800 Low Mappability +chr2R 15616900 15653700 High Signal Region +chr2R 15663700 15667500 Low Mappability +chr2R 16667200 16675500 Low Mappability +chr2R 16882700 16885400 High Signal Region +chr2R 17038700 17049200 High Signal Region +chr2R 17532800 17535100 High Signal Region +chr2R 18413300 18417100 Low Mappability +chr2R 19865400 19867600 High Signal Region +chr2R 20897600 20900500 High Signal Region +chr2R 21144900 21146700 Low Mappability +chr3LHet 1345100 1347600 High Signal Region +chr3LHet 2162700 2187800 High Signal Region +chr3LHet 2202300 2209300 Low Mappability +chr3LHet 2244300 2253000 Low Mappability +chr3L 130400 133100 High Signal Region +chr3L 223200 226000 High Signal Region +chr3L 270300 272600 High Signal Region +chr3L 318400 320800 High Signal Region +chr3L 539500 544400 High Signal Region +chr3L 748200 750200 High Signal Region +chr3L 1334000 1336300 High Signal Region +chr3L 1545400 1547500 High Signal Region +chr3L 1567700 1570700 High Signal Region +chr3L 1793900 1796200 High Signal Region +chr3L 2063400 2069800 High Signal Region +chr3L 2585300 2590700 High Signal Region +chr3L 3059100 3071600 High Signal Region +chr3L 3147900 3150300 High Signal Region +chr3L 3218400 3227200 High Signal Region +chr3L 3896400 3905700 High Signal Region +chr3L 4133100 4139800 High Signal Region +chr3L 5104000 5117900 Low Mappability +chr3L 7349700 7355100 High Signal Region +chr3L 7662100 7684600 High Signal Region +chr3L 7972100 7981100 Low Mappability +chr3L 8012400 8017600 High Signal Region +chr3L 8784200 8790000 Low Mappability +chr3L 9385600 9395900 High Signal Region +chr3L 9415500 9424500 High Signal Region +chr3L 9569100 9574400 Low Mappability +chr3L 10729500 10731200 High Signal Region +chr3L 11238800 11246200 High Signal Region +chr3L 11479900 11481800 High Signal Region +chr3L 11545300 11547300 High Signal Region +chr3L 11605800 11612700 High Signal Region +chr3L 11955900 11966000 High Signal Region +chr3L 14747500 14755700 High Signal Region +chr3L 14773500 14784000 High Signal Region +chr3L 15134000 15138400 High Signal Region +chr3L 15818200 15820000 High Signal Region +chr3L 16038000 16046600 High Signal Region +chr3L 16639900 16641900 High Signal Region +chr3L 16653500 16655400 High Signal Region +chr3L 17003800 17006300 High Signal Region +chr3L 17636700 17638700 High Signal Region +chr3L 18827000 18834000 High Signal Region +chr3L 19887800 19909000 High Signal Region +chr3L 20391900 20395600 High Signal Region +chr3L 20466800 20477400 High Signal Region +chr3L 20510900 20513100 High Signal Region +chr3L 20808600 20818200 High Signal Region +chr3L 21000400 21022000 High Signal Region +chr3L 21358800 21371300 Low Mappability +chr3L 22092600 22099100 Low Mappability +chr3L 22734700 22738400 Low Mappability +chr3L 22754300 22760100 Low Mappability +chr3L 23817900 23833100 High Signal Region +chr3L 23935400 23941300 Low Mappability +chr3L 24447200 24484400 Low Mappability +chr3RHet 43700 62000 High Signal Region +chr3RHet 1342700 1348400 Low Mappability +chr3RHet 1508900 1524800 Low Mappability +chr3RHet 1816700 1834500 High Signal Region +chr3RHet 1947200 1958800 High Signal Region +chr3RHet 2307400 2309300 Low Mappability +chr3R 228600 238600 Low Mappability +chr3R 509000 510400 High Signal Region +chr3R 564200 566200 High Signal Region +chr3R 777000 779000 High Signal Region +chr3R 828100 833700 High Signal Region +chr3R 871500 878800 Low Mappability +chr3R 911600 914500 High Signal Region +chr3R 1076200 1078200 High Signal Region +chr3R 1424600 1427600 High Signal Region +chr3R 1448500 1451200 High Signal Region +chr3R 2230300 2234000 High Signal Region +chr3R 2645100 2649200 High Signal Region +chr3R 2899200 2916800 High Signal Region +chr3R 2933000 2935200 High Signal Region +chr3R 3176400 3180900 High Signal Region +chr3R 3917000 3932800 Low Mappability +chr3R 4396100 4400900 Low Mappability +chr3R 4872100 4884300 High Signal Region +chr3R 5242900 5245900 High Signal Region +chr3R 5335200 5343100 High Signal Region +chr3R 5376200 5378000 High Signal Region +chr3R 5415800 5418000 High Signal Region +chr3R 5454700 5457000 High Signal Region +chr3R 5510100 5537700 High Signal Region +chr3R 5697900 5701200 High Signal Region +chr3R 6080700 6091100 Low Mappability +chr3R 6167400 6182800 High Signal Region +chr3R 6207000 6215100 High Signal Region +chr3R 7583800 7590800 High Signal Region +chr3R 7779600 7786900 High Signal Region +chr3R 8228800 8230500 High Signal Region +chr3R 8290300 8337000 High Signal Region +chr3R 8452400 8454700 High Signal Region +chr3R 9509100 9511200 High Signal Region +chr3R 10109000 10113200 High Signal Region +chr3R 10548800 10550700 High Signal Region +chr3R 10920100 10922000 High Signal Region +chr3R 10956400 10966900 High Signal Region +chr3R 11112800 11118600 High Signal Region +chr3R 11798100 11800500 High Signal Region +chr3R 12054400 12069200 High Signal Region +chr3R 12074500 12081400 High Signal Region +chr3R 12813500 12821900 High Signal Region +chr3R 13506900 13509400 High Signal Region +chr3R 13542200 13544300 High Signal Region +chr3R 13751200 13753400 Low Mappability +chr3R 14022000 14025900 High Signal Region +chr3R 14962500 14964700 Low Mappability +chr3R 17121500 17134300 High Signal Region +chr3R 17173800 17176100 High Signal Region +chr3R 17430600 17445700 Low Mappability +chr3R 17456100 17459600 High Signal Region +chr3R 18275900 18279300 High Signal Region +chr3R 19358600 19360500 High Signal Region +chr3R 19383900 19386200 Low Mappability +chr3R 19715100 19717300 High Signal Region +chr3R 19902300 19904500 Low Mappability +chr3R 19929900 19934800 High Signal Region +chr3R 20407300 20409000 High Signal Region +chr3R 20874800 20877200 High Signal Region +chr3R 22922500 22923900 High Signal Region +chr3R 22966200 22977000 High Signal Region +chr3R 23406600 23408200 High Signal Region +chr3R 23551000 23556900 Low Mappability +chr3R 23682500 23694400 Low Mappability +chr3R 23894000 23899900 Low Mappability +chr3R 24151200 24153300 High Signal Region +chr3R 24889000 24891200 High Signal Region +chr3R 25563700 25565800 Low Mappability +chr3R 25910200 25912400 Low Mappability +chr3R 26900000 26906600 High Signal Region +chr3R 27041200 27048400 High Signal Region +chr3R 27238900 27243600 High Signal Region +chr3R 27433400 27437700 High Signal Region +chr3R 27572500 27575000 High Signal Region +chr3R 27893200 27905000 High Signal Region +chr4 97100 102500 Low Mappability +chr4 1278500 1351800 High Signal Region +chrXHet 32400 43100 Low Mappability +chrXHet 87400 132300 High Signal Region +chrX 0 18800 High Signal Region +chrX 322300 328700 Low Mappability +chrX 1251600 1275300 High Signal Region +chrX 2012900 2033200 High Signal Region +chrX 2504400 2514500 Low Mappability +chrX 2683600 2687100 High Signal Region +chrX 2964000 2975200 Low Mappability +chrX 3308900 3315500 Low Mappability +chrX 3620500 3624500 Low Mappability +chrX 3684200 3699100 High Signal Region +chrX 3834600 3844200 High Signal Region +chrX 4812700 4831100 High Signal Region +chrX 4884100 4891200 High Signal Region +chrX 6065700 6073000 High Signal Region +chrX 7019400 7028400 High Signal Region +chrX 7374800 7376400 High Signal Region +chrX 7791300 7793200 High Signal Region +chrX 7949800 7957700 High Signal Region +chrX 8186900 8190800 High Signal Region +chrX 8821300 8824300 Low Mappability +chrX 9517700 9520200 High Signal Region +chrX 10657000 10663600 High Signal Region +chrX 10990100 10997200 Low Mappability +chrX 11206200 11212900 High Signal Region +chrX 11473900 11494400 High Signal Region +chrX 11527500 11542700 Low Mappability +chrX 11607000 11609700 High Signal Region +chrX 12824600 12831700 Low Mappability +chrX 15705000 15706900 High Signal Region +chrX 15907200 15953600 High Signal Region +chrX 18323300 18329000 Low Mappability +chrX 18676100 18682200 High Signal Region +chrX 18757000 18759500 High Signal Region +chrX 19247000 19252800 Low Mappability +chrX 20070100 20101800 High Signal Region +chrX 21611900 21630800 High Signal Region +chrX 21833900 21835900 High Signal Region +chrX 21924100 21927700 Low Mappability +chrYHet 125600 138300 Low Mappability +chrYHet 195300 225100 High Signal Region diff --git a/assets/blacklists/v2.0/dm6-blacklist.v2.bed b/assets/blacklists/v2.0/dm6-blacklist.v2.bed new file mode 100644 index 000000000..65a220b82 --- /dev/null +++ b/assets/blacklists/v2.0/dm6-blacklist.v2.bed @@ -0,0 +1,182 @@ +chr2L 154500 167500 High Signal Region +chr2L 348000 365800 High Signal Region +chr2L 471400 482200 High Signal Region +chr2L 2191400 2200900 High Signal Region +chr2L 2749200 2756400 High Signal Region +chr2L 3161500 3164200 High Signal Region +chr2L 4536500 4544800 High Signal Region +chr2L 4938100 4941000 High Signal Region +chr2L 5206500 5210500 High Signal Region +chr2L 5827800 5836700 High Signal Region +chr2L 5976700 5987500 High Signal Region +chr2L 7343400 7349300 High Signal Region +chr2L 9898700 9902900 High Signal Region +chr2L 11316200 11317600 High Signal Region +chr2L 11992200 12013200 High Signal Region +chr2L 16267500 16271800 High Signal Region +chr2L 16283900 16289200 High Signal Region +chr2L 18942900 18945600 High Signal Region +chr2L 20647200 20649100 High Signal Region +chr2L 21236600 21238800 High Signal Region +chr2L 21415900 21544200 High Signal Region +chr2L 21653300 21656300 High Signal Region +chr2L 22409400 22479400 High Signal Region +chr2L 22488500 22506800 High Signal Region +chr2L 22765200 22909700 High Signal Region +chr2L 23096400 23122300 High Signal Region +chr2L 23353200 23387900 High Signal Region +chr2L 23511900 23513700 High Signal Region +chr2R 0 14900 High Signal Region +chr2R 744200 878700 Low Mappability +chr2R 1492900 1530200 High Signal Region +chr2R 1818200 1840700 Low Mappability +chr2R 1931800 1949300 Low Mappability +chr2R 2158800 2169900 High Signal Region +chr2R 2218200 2238300 High Signal Region +chr2R 2652000 2665400 High Signal Region +chr2R 3601600 3603200 High Signal Region +chr2R 3718500 3775000 High Signal Region +chr2R 3943700 3998100 High Signal Region +chr2R 4274200 4275400 High Signal Region +chr2R 4863000 4884000 Low Mappability +chr2R 5046600 5057100 High Signal Region +chr2R 5105500 5110000 High Signal Region +chr2R 7235700 7250000 High Signal Region +chr2R 8015400 8018000 Low Mappability +chr2R 10177000 10186000 High Signal Region +chr2R 10948100 10955500 High Signal Region +chr2R 12586100 12594400 High Signal Region +chr2R 14188900 14195500 High Signal Region +chr2R 18575400 18582400 High Signal Region +chr2R 19715900 19766800 High Signal Region +chr2R 21151200 21153700 High Signal Region +chr2R 24177300 24184700 High Signal Region +chr2R 25257500 25286800 High Signal Region +chr3L 2063300 2069900 High Signal Region +chr3L 2447600 2456600 High Signal Region +chr3L 3899000 3903000 High Signal Region +chr3L 7669000 7691700 High Signal Region +chr3L 7978900 7987800 Low Mappability +chr3L 8019300 8024500 High Signal Region +chr3L 11968300 11972800 High Signal Region +chr3L 16596900 16607900 High Signal Region +chr3L 18833900 18840800 High Signal Region +chr3L 20473700 20484300 High Signal Region +chr3L 20815500 20825000 High Signal Region +chr3L 22099400 22106200 Low Mappability +chr3L 22761300 22767100 High Signal Region +chr3L 23111800 23118300 High Signal Region +chr3L 23825700 23839600 High Signal Region +chr3L 24384500 24445600 High Signal Region +chr3L 24576600 24669400 High Signal Region +chr3L 25051000 25054100 High Signal Region +chr3L 25129300 25135900 High Signal Region +chr3L 25962100 25964900 High Signal Region +chr3L 26877500 27082600 High Signal Region +chr3L 27137300 27140300 Low Mappability +chr3L 27471600 27649900 High Signal Region +chr3R 0 32600 High Signal Region +chr3R 43000 82600 High Signal Region +chr3R 236900 285000 High Signal Region +chr3R 499300 529400 High Signal Region +chr3R 1271100 1279000 Low Mappability +chr3R 1369500 1390500 Low Mappability +chr3R 2619300 2623900 High Signal Region +chr3R 2749700 2768300 High Signal Region +chr3R 2775800 2782000 Low Mappability +chr3R 3032500 3058100 High Signal Region +chr3R 3087400 3136500 High Signal Region +chr3R 3168900 3171300 Low Mappability +chr3R 3697900 3702100 High Signal Region +chr3R 4738400 4740500 High Signal Region +chr3R 4951200 4953300 High Signal Region +chr3R 5002300 5009200 High Signal Region +chr3R 5045500 5053200 Low Mappability +chr3R 5085900 5088400 High Signal Region +chr3R 5598800 5602000 High Signal Region +chr3R 5622700 5625500 High Signal Region +chr3R 6404600 6408200 High Signal Region +chr3R 6819500 6823400 High Signal Region +chr3R 7073500 7076300 High Signal Region +chr3R 7107300 7109400 High Signal Region +chr3R 8091300 8107100 Low Mappability +chr3R 8570200 8575500 Low Mappability +chr3R 9046400 9058600 High Signal Region +chr3R 9351800 9354100 High Signal Region +chr3R 9417100 9420200 High Signal Region +chr3R 9509400 9517300 High Signal Region +chr3R 9550400 9552400 High Signal Region +chr3R 9590000 9592300 High Signal Region +chr3R 9629000 9631300 High Signal Region +chr3R 9684500 9712000 High Signal Region +chr3R 10255100 10265400 Low Mappability +chr3R 10341700 10357100 High Signal Region +chr3R 10377600 10389400 High Signal Region +chr3R 11758100 11765200 High Signal Region +chr3R 11948900 11961000 High Signal Region +chr3R 12464500 12511200 High Signal Region +chr3R 12626600 12629000 High Signal Region +chr3R 13683300 13685500 High Signal Region +chr3R 14283200 14287500 High Signal Region +chr3R 14696300 14698100 High Signal Region +chr3R 14723000 14725100 High Signal Region +chr3R 15094300 15096400 High Signal Region +chr3R 15130700 15135000 High Signal Region +chr3R 15286000 15292900 High Signal Region +chr3R 15972300 15974800 High Signal Region +chr3R 16224000 16243500 High Signal Region +chr3R 16248800 16255800 High Signal Region +chr3R 16987900 16996000 High Signal Region +chr3R 17681200 17683700 High Signal Region +chr3R 17716400 17718700 High Signal Region +chr3R 17925400 17927800 High Signal Region +chr3R 18196300 18200300 High Signal Region +chr3R 21295600 21308400 High Signal Region +chr3R 21348100 21350400 High Signal Region +chr3R 21604900 21620100 Low Mappability +chr3R 21630200 21633900 High Signal Region +chr3R 22450200 22453500 High Signal Region +chr3R 23889300 23891600 High Signal Region +chr3R 24076500 24078900 Low Mappability +chr3R 24104200 24109100 High Signal Region +chr3R 24581500 24590000 High Signal Region +chr3R 25049000 25051400 High Signal Region +chr3R 27140500 27151400 High Signal Region +chr3R 27580600 27582500 High Signal Region +chr3R 27856800 27868700 Low Mappability +chr3R 28325400 28327600 High Signal Region +chr3R 29063200 29065500 High Signal Region +chr3R 29737800 29740200 Low Mappability +chr3R 30084400 30086800 Low Mappability +chr3R 31215500 31222700 High Signal Region +chr3R 31413100 31417800 High Signal Region +chr3R 31607700 31611900 High Signal Region +chr3R 31746800 31749300 High Signal Region +chr3R 32067500 32079300 Low Mappability +chr4 1274800 1348100 High Signal Region +chrX 0 122400 High Signal Region +chrX 201200 246300 High Signal Region +chrX 2610300 2617700 High Signal Region +chrX 4921800 4937000 High Signal Region +chrX 4990100 4997200 High Signal Region +chrX 7125300 7134300 High Signal Region +chrX 8292900 8296800 High Signal Region +chrX 11487700 11494000 High Signal Region +chrX 16013200 16059600 High Signal Region +chrX 19907800 19958400 High Signal Region +chrX 22257600 22401900 High Signal Region +chrX 22432100 22434100 High Signal Region +chrX 22996400 23003500 High Signal Region +chrX 23019600 23022700 Low Mappability +chrX 23204900 23285000 High Signal Region +chrX 23290700 23442900 High Signal Region +chrX 23450200 23465000 Low Mappability +chrX 23471400 23489900 Low Mappability +chrX 23512700 23539400 Low Mappability +chrY 113900 125600 High Signal Region +chrY 131500 155700 High Signal Region +chrY 199800 248700 High Signal Region +chrY 313600 325400 High Signal Region +chrY 641400 654100 Low Mappability +chrY 1456900 1693500 High Signal Region +chrY 3641100 3667300 High Signal Region diff --git a/assets/blacklists/v2.0/hg19-blacklist.v2.bed b/assets/blacklists/v2.0/hg19-blacklist.v2.bed new file mode 100644 index 000000000..03688d91e --- /dev/null +++ b/assets/blacklists/v2.0/hg19-blacklist.v2.bed @@ -0,0 +1,834 @@ +chr10 38726200 42489100 High Signal Region +chr10 42524900 42819200 High Signal Region +chr10 98560400 98562500 High Signal Region +chr10 135437600 135534700 High Signal Region +chr11 0 196300 High Signal Region +chr11 584400 586500 High Signal Region +chr11 964000 966100 Low Mappability +chr11 1015700 1019100 High Signal Region +chr11 1088800 1094300 High Signal Region +chr11 1141100 1214300 High Signal Region +chr11 3674100 3676900 Low Mappability +chr11 6830800 6832700 High Signal Region +chr11 10528500 10532700 Low Mappability +chr11 11267200 11269500 High Signal Region +chr11 48700000 48964800 High Signal Region +chr11 50505600 50523400 High Signal Region +chr11 50635500 51200100 High Signal Region +chr11 51244400 51289000 High Signal Region +chr11 51566300 54834600 High Signal Region +chr11 54876800 55028400 High Signal Region +chr11 62606300 62651300 High Signal Region +chr11 77596600 77601800 High Signal Region +chr11 85172700 85196400 High Signal Region +chr11 93965500 93984500 High Signal Region +chr11 100156600 100162500 High Signal Region +chr11 102239800 102246000 High Signal Region +chr11 129208700 129234600 High Signal Region +chr12 0 187000 High Signal Region +chr12 479900 531700 High Signal Region +chr12 2364000 2366100 High Signal Region +chr12 2628700 2649700 Low Mappability +chr12 4618500 4624000 High Signal Region +chr12 6037400 6042400 Low Mappability +chr12 7705200 7717600 High Signal Region +chr12 19881600 19887000 High Signal Region +chr12 20703400 20705400 High Signal Region +chr12 20921400 20928000 High Signal Region +chr12 34371700 34400000 High Signal Region +chr12 34574500 34576400 Low Mappability +chr12 34761600 37887400 High Signal Region +chr12 37989200 38259900 High Signal Region +chr12 38330900 38375800 Low Mappability +chr12 38443400 38503500 High Signal Region +chr12 38534900 38537700 High Signal Region +chr12 41756500 41758400 Low Mappability +chr12 54205000 54206900 High Signal Region +chr12 66867700 66872600 High Signal Region +chr12 69385000 69391000 High Signal Region +chr12 70167100 70204100 High Signal Region +chr12 75903800 75916900 High Signal Region +chr12 93771900 93808100 High Signal Region +chr12 97117400 97122300 High Signal Region +chr12 101540100 101549000 High Signal Region +chr12 113517400 113519300 High Signal Region +chr12 125394300 125426400 Low Mappability +chr12 126072900 126074800 Low Mappability +chr12 127649500 127651900 High Signal Region +chr12 130863600 130878600 High Signal Region +chr12 132060500 132074200 High Signal Region +chr12 133343000 133345000 High Signal Region +chr12 133825400 133851800 Low Mappability +chr13 0 19194200 High Signal Region +chr13 19344200 19447900 High Signal Region +chr13 19641600 19652000 High Signal Region +chr13 19677800 19683400 High Signal Region +chr13 19711000 19713300 High Signal Region +chr13 20051500 20077000 Low Mappability +chr13 20150200 20228600 High Signal Region +chr13 20352400 20372400 High Signal Region +chr13 20966500 20984700 High Signal Region +chr13 21068500 21072900 High Signal Region +chr13 21816000 21826300 High Signal Region +chr13 21950600 21952600 High Signal Region +chr13 22125500 22129800 High Signal Region +chr13 22429700 22436000 High Signal Region +chr13 23095400 23108300 Low Mappability +chr13 24900500 24932100 High Signal Region +chr13 25122300 25128600 High Signal Region +chr13 26467000 26472000 Low Mappability +chr13 27977100 28035000 High Signal Region +chr13 28710500 28733700 High Signal Region +chr13 29767400 29769600 High Signal Region +chr13 30215700 30247400 Low Mappability +chr13 30397900 30426100 High Signal Region +chr13 30787000 30790100 High Signal Region +chr13 30819100 30845000 High Signal Region +chr13 31412800 31440600 High Signal Region +chr13 31521900 31523400 High Signal Region +chr13 31916200 31920700 High Signal Region +chr13 31970100 31971800 High Signal Region +chr13 33109900 33114000 High Signal Region +chr13 33149400 33182100 High Signal Region +chr13 33441700 33443500 Low Mappability +chr13 34163100 34164900 High Signal Region +chr13 34558900 34565000 High Signal Region +chr13 35054300 35073100 High Signal Region +chr13 35656000 35664300 High Signal Region +chr13 35977500 36001800 High Signal Region +chr13 36531200 36553700 High Signal Region +chr13 36582200 36588400 High Signal Region +chr13 37723900 37730200 High Signal Region +chr13 38396200 38402300 Low Mappability +chr13 38640900 38645800 High Signal Region +chr13 38687300 38721000 High Signal Region +chr13 40422400 40427800 High Signal Region +chr13 40560400 40580700 Low Mappability +chr13 40920400 40936600 Low Mappability +chr13 41309000 41315200 Low Mappability +chr13 41343800 41416000 High Signal Region +chr13 41438500 41477300 High Signal Region +chr13 41530500 41640400 High Signal Region +chr13 42108700 42114800 High Signal Region +chr13 42165400 42243300 High Signal Region +chr13 42321000 42324400 High Signal Region +chr13 42445300 42448800 High Signal Region +chr13 42479700 42497900 High Signal Region +chr13 42928200 42961000 Low Mappability +chr13 42999000 43005200 Low Mappability +chr13 43128800 43132300 High Signal Region +chr13 43734900 43740400 High Signal Region +chr13 44391900 44409800 Low Mappability +chr13 44540800 44550400 High Signal Region +chr13 45491200 45494100 High Signal Region +chr13 46190900 46244300 High Signal Region +chr13 47322400 47347500 Low Mappability +chr13 47795600 47799800 Low Mappability +chr13 48288000 48379400 High Signal Region +chr13 48551900 48636200 High Signal Region +chr13 48776900 48781200 High Signal Region +chr13 48955700 49045800 High Signal Region +chr13 49587600 49593700 High Signal Region +chr13 49726300 49750700 High Signal Region +chr13 50655600 50674000 High Signal Region +chr13 50739500 50760400 High Signal Region +chr13 50804000 50831700 High Signal Region +chr13 51045000 51047900 Low Mappability +chr13 51069400 51148800 High Signal Region +chr13 51538700 51562300 High Signal Region +chr13 51643200 51654900 High Signal Region +chr13 52056600 52177400 High Signal Region +chr13 52209900 52311100 High Signal Region +chr13 52628400 52634200 Low Mappability +chr13 52767000 52908600 High Signal Region +chr13 53056600 53198500 High Signal Region +chr13 53667700 53672500 Low Mappability +chr13 54170800 54195600 High Signal Region +chr13 55314400 55316200 Low Mappability +chr13 55924900 55928900 Low Mappability +chr13 56386000 56387700 Low Mappability +chr13 57149500 57152400 High Signal Region +chr13 57613800 57615800 Low Mappability +chr13 57713200 57748000 High Signal Region +chr13 57793400 57794800 High Signal Region +chr13 57929500 57933400 Low Mappability +chr13 58055700 58068300 High Signal Region +chr13 58756200 58759000 High Signal Region +chr13 59246600 59252600 Low Mappability +chr13 60399100 60401600 Low Mappability +chr13 60558600 60561900 Low Mappability +chr13 60819900 60825800 High Signal Region +chr13 60868000 60870400 Low Mappability +chr13 61508300 61510400 High Signal Region +chr13 62142000 62143900 Low Mappability +chr13 62379800 62381800 High Signal Region +chr13 62407700 62419700 High Signal Region +chr13 63602300 63649700 High Signal Region +chr13 64291000 64343600 Low Mappability +chr13 64395200 64410800 Low Mappability +chr13 66567000 66569000 Low Mappability +chr13 66827800 66833700 High Signal Region +chr13 67311600 67317700 Low Mappability +chr13 67350200 67352500 High Signal Region +chr13 68136600 68139600 Low Mappability +chr13 68254200 68260100 High Signal Region +chr13 68566600 68570000 High Signal Region +chr13 68901600 68915400 High Signal Region +chr13 70357600 70362900 Low Mappability +chr13 70783800 70789000 High Signal Region +chr13 71751300 71752700 High Signal Region +chr13 71958500 71963800 High Signal Region +chr13 72799900 72802600 High Signal Region +chr13 73184400 73190500 High Signal Region +chr13 74027000 74033300 High Signal Region +chr13 74202200 74220800 High Signal Region +chr13 74809900 74816100 High Signal Region +chr13 75111000 75116700 High Signal Region +chr13 75606300 75608100 Low Mappability +chr13 75653600 75655800 High Signal Region +chr13 75815200 75821400 High Signal Region +chr13 76251800 76322500 High Signal Region +chr13 76528000 76532300 High Signal Region +chr13 76841900 76843700 High Signal Region +chr13 77119100 77122400 Low Mappability +chr13 77179200 77192700 Low Mappability +chr13 77773200 77779300 High Signal Region +chr13 78250500 78260900 Low Mappability +chr13 78453800 78455300 High Signal Region +chr13 78857200 78859700 High Signal Region +chr13 79087100 79105000 High Signal Region +chr13 79590600 79592200 High Signal Region +chr13 79809800 79811600 High Signal Region +chr13 80391100 80420000 High Signal Region +chr13 80726100 80730600 Low Mappability +chr13 81490500 81492700 Low Mappability +chr13 81638000 81651500 High Signal Region +chr13 82132700 82135500 Low Mappability +chr13 82322400 82327400 High Signal Region +chr13 82619500 82625600 Low Mappability +chr13 82805900 82809300 High Signal Region +chr13 83315600 83317200 Low Mappability +chr13 84095600 84097700 High Signal Region +chr13 84535300 84540600 High Signal Region +chr13 85075900 85078300 Low Mappability +chr13 85299500 85302300 High Signal Region +chr13 85695400 85703500 High Signal Region +chr13 86143500 86147200 Low Mappability +chr13 86485900 86502200 Low Mappability +chr13 86572000 86573600 High Signal Region +chr13 87297900 87303700 High Signal Region +chr13 88351600 88353900 High Signal Region +chr13 89335000 89337000 Low Mappability +chr13 89482500 89486800 Low Mappability +chr13 89740300 89746300 Low Mappability +chr13 91181200 91182800 Low Mappability +chr13 91305800 91322300 Low Mappability +chr13 92256000 92259400 High Signal Region +chr13 92622900 92628600 High Signal Region +chr13 93127200 93129400 Low Mappability +chr13 93170900 93175000 High Signal Region +chr13 94140200 94148600 High Signal Region +chr13 95024400 95030600 Low Mappability +chr13 95471000 95472500 Low Mappability +chr13 95561600 95563600 High Signal Region +chr13 96217900 96220200 High Signal Region +chr13 96377900 96393100 High Signal Region +chr13 96481000 96493700 High Signal Region +chr13 96556500 96572100 High Signal Region +chr13 96616700 96633300 Low Mappability +chr13 96699300 96705200 High Signal Region +chr13 97807500 97812400 High Signal Region +chr13 97873500 98016000 High Signal Region +chr13 98083600 98086200 High Signal Region +chr13 98256400 98266100 Low Mappability +chr13 99386700 99407200 High Signal Region +chr13 100970400 100973100 High Signal Region +chr13 101327900 101356500 Low Mappability +chr13 102191500 102196900 High Signal Region +chr13 102250800 102254200 High Signal Region +chr13 102293700 102296000 High Signal Region +chr13 102560800 102562600 High Signal Region +chr13 103174700 103180600 High Signal Region +chr13 103770000 103772400 High Signal Region +chr13 104155400 104159600 High Signal Region +chr13 105306100 105307700 Low Mappability +chr13 105609500 105613300 High Signal Region +chr13 105951400 105953800 Low Mappability +chr13 106035100 106040800 Low Mappability +chr13 106536800 106542400 High Signal Region +chr13 106651900 106669100 High Signal Region +chr13 106866200 106872100 High Signal Region +chr13 107430500 107436700 High Signal Region +chr13 108868800 108909300 Low Mappability +chr13 109162700 109168900 High Signal Region +chr13 110075500 110098100 Low Mappability +chr13 110691900 110705300 High Signal Region +chr13 111036900 111039000 High Signal Region +chr13 111107500 111163000 High Signal Region +chr13 111512100 111527200 High Signal Region +chr13 111959100 111964000 High Signal Region +chr13 111992000 111994500 Low Mappability +chr13 112148400 112153300 High Signal Region +chr13 112628400 112630400 High Signal Region +chr13 112668600 112670300 High Signal Region +chr13 112931200 112973400 High Signal Region +chr13 113179900 113244400 High Signal Region +chr13 113319400 113321900 High Signal Region +chr13 113440500 113444300 High Signal Region +chr13 113526200 113540600 High Signal Region +chr13 113765500 113767700 High Signal Region +chr13 113916300 113951000 Low Mappability +chr13 114089500 114102600 High Signal Region +chr13 114191600 114218700 High Signal Region +chr13 114247100 114280800 High Signal Region +chr13 114452900 114520000 High Signal Region +chr13 114553300 114571100 High Signal Region +chr13 114601800 114772500 High Signal Region +chr13 114848900 114852600 High Signal Region +chr14 0 20303800 High Signal Region +chr14 27098600 27104100 High Signal Region +chr14 32263100 32280800 High Signal Region +chr14 32350600 32352500 High Signal Region +chr14 32934800 32955200 Low Mappability +chr14 35006400 35031800 Low Mappability +chr14 36416700 36419200 High Signal Region +chr14 39980200 39995800 High Signal Region +chr14 54700600 54706600 High Signal Region +chr14 67508000 67534600 High Signal Region +chr14 80556900 80561000 High Signal Region +chr14 86540300 86577300 High Signal Region +chr14 87058300 87078200 High Signal Region +chr14 87879900 87894500 High Signal Region +chr14 88236600 88243300 High Signal Region +chr14 90340400 90342300 High Signal Region +chr14 102140800 102142700 High Signal Region +chr14 105681400 105707200 High Signal Region +chr14 106034900 106185200 High Signal Region +chr14 107151000 107176900 High Signal Region +chr15 0 20166200 High Signal Region +chr15 20200400 22365100 High Signal Region +chr15 22387400 22749100 Low Mappability +chr15 23266700 23612800 High Signal Region +chr15 26002700 26004600 Low Mappability +chr15 28538400 28956300 High Signal Region +chr15 30358500 30919300 High Signal Region +chr15 31136500 31143800 Low Mappability +chr15 32445900 32915200 High Signal Region +chr15 56603300 56608500 High Signal Region +chr15 69255900 69257800 High Signal Region +chr15 72085400 72090500 High Signal Region +chr15 72923800 72979000 Low Mappability +chr15 74357800 74398000 High Signal Region +chr15 75546200 75592100 High Signal Region +chr15 77991000 77993000 High Signal Region +chr15 82582300 83213900 High Signal Region +chr15 84835000 85142500 High Signal Region +chr15 85732700 85814600 High Signal Region +chr15 102283600 102305300 Low Mappability +chr15 102411600 102531300 High Signal Region +chr16 32923000 33427100 High Signal Region +chr16 33726300 34197900 High Signal Region +chr16 35191100 46501300 High Signal Region +chr16 90155800 90354700 Low Mappability +chr17 66700 167600 High Signal Region +chr17 964700 969400 High Signal Region +chr17 1210900 1236400 Low Mappability +chr17 4734800 4736700 Low Mappability +chr17 18928600 19140800 High Signal Region +chr17 21492100 21686000 High Signal Region +chr17 21901400 21908600 High Signal Region +chr17 22019700 22024900 High Signal Region +chr17 22207000 25341300 High Signal Region +chr17 30264500 30277600 High Signal Region +chr17 31148500 31150800 High Signal Region +chr17 33477200 33479300 High Signal Region +chr17 34476000 34812200 Low Mappability +chr17 36253600 36406900 High Signal Region +chr17 41378900 41402100 High Signal Region +chr17 41432200 41467700 High Signal Region +chr17 43588700 43718700 High Signal Region +chr17 45108700 45130400 Low Mappability +chr17 45211900 45283300 High Signal Region +chr17 45612500 45671300 Low Mappability +chr17 51182300 51184600 Low Mappability +chr17 64794200 64796200 Low Mappability +chr17 78717100 78719200 Low Mappability +chr17 81151700 81195200 Low Mappability +chr18 0 127000 High Signal Region +chr18 952000 976900 High Signal Region +chr18 2247200 2253300 High Signal Region +chr18 2841300 2866100 Low Mappability +chr18 6687500 6705800 High Signal Region +chr18 12134400 12227800 High Signal Region +chr18 14163200 14270800 High Signal Region +chr18 15139700 15271400 High Signal Region +chr18 15293900 18552900 High Signal Region +chr18 19792100 19813600 High Signal Region +chr18 20109800 20115600 High Signal Region +chr18 20388600 20400600 High Signal Region +chr18 27088800 27090300 High Signal Region +chr18 28927900 28933700 High Signal Region +chr18 30436500 30442100 High Signal Region +chr18 32114600 32137900 High Signal Region +chr18 32924100 32938700 High Signal Region +chr18 33196300 33213600 High Signal Region +chr18 33342300 33346200 High Signal Region +chr18 38424600 38428200 High Signal Region +chr18 42024800 42028200 High Signal Region +chr18 42607900 42611000 High Signal Region +chr18 44125300 44127400 High Signal Region +chr18 44503000 44515000 High Signal Region +chr18 44541400 44558200 High Signal Region +chr18 45378700 45380700 Low Mappability +chr18 46175800 46204100 High Signal Region +chr18 46572200 46634900 High Signal Region +chr18 47297100 47302900 High Signal Region +chr18 50318200 50320200 High Signal Region +chr18 52710600 52712900 High Signal Region +chr18 53382700 53388400 High Signal Region +chr18 54391800 54393600 High Signal Region +chr18 60853700 60886800 High Signal Region +chr18 61530100 61533700 High Signal Region +chr18 68386500 68419400 High Signal Region +chr18 74678000 74695900 High Signal Region +chr18 76196600 76198900 Low Mappability +chr18 76272400 76275200 High Signal Region +chr18 76773800 76800400 High Signal Region +chr18 77031200 77124400 High Signal Region +chr18 77233300 77236000 High Signal Region +chr18 77377700 77394500 High Signal Region +chr18 77679100 77681700 High Signal Region +chr18 77772000 77796400 High Signal Region +chr19 7514300 7516900 High Signal Region +chr19 8850900 8910700 High Signal Region +chr19 24182700 24198600 High Signal Region +chr19 24501500 27995100 High Signal Region +chr19 35349800 35357000 High Signal Region +chr19 36065600 36067700 High Signal Region +chr19 37756400 37795100 High Signal Region +chr19 44912700 44921200 High Signal Region +chr19 44958200 44964700 Low Mappability +chr19 48406200 48463100 High Signal Region +chr19 50593500 50643700 High Signal Region +chr1 0 750100 High Signal Region +chr1 814500 845200 High Signal Region +chr1 2052400 2056000 High Signal Region +chr1 2582800 2693900 High Signal Region +chr1 4362200 4364300 High Signal Region +chr1 5714800 5736800 High Signal Region +chr1 16821600 17301500 High Signal Region +chr1 38076400 38078300 Low Mappability +chr1 91836500 91854100 High Signal Region +chr1 120531600 120896300 High Signal Region +chr1 120926100 121149300 High Signal Region +chr1 121341500 145396500 High Signal Region +chr1 147424800 147731700 High Signal Region +chr1 147832000 149058800 High Signal Region +chr1 152185700 152191100 High Signal Region +chr1 156185300 156187600 High Signal Region +chr1 161392300 161442700 High Signal Region +chr1 168317300 168322800 High Signal Region +chr1 203888700 203890700 High Signal Region +chr1 224175500 224213600 High Signal Region +chr1 228743700 228782800 High Signal Region +chr1 236876100 236879100 Low Mappability +chr1 237765500 237767500 High Signal Region +chr1 246980600 246982700 High Signal Region +chr1 249225300 249250600 High Signal Region +chr20 25733100 25945000 Low Mappability +chr20 25984200 26150000 Low Mappability +chr20 26184300 29519700 High Signal Region +chr20 29546800 29853000 High Signal Region +chr20 46521400 46531600 High Signal Region +chr20 47130700 47133900 High Signal Region +chr20 62887700 63025500 Low Mappability +chr21 9594900 10366000 High Signal Region +chr21 10491900 10494000 Low Mappability +chr21 10646000 10861500 High Signal Region +chr21 11004200 14370200 High Signal Region +chr22 0 16962100 High Signal Region +chr22 17348200 17393500 Low Mappability +chr22 17494600 17519200 Low Mappability +chr22 18358700 18361200 High Signal Region +chr22 18657400 18889800 High Signal Region +chr22 20304800 20708400 High Signal Region +chr22 21466100 21916600 High Signal Region +chr22 23826900 23829900 Low Mappability +chr22 33517600 33519500 High Signal Region +chr22 36280800 36282700 Low Mappability +chr22 51058600 51083400 High Signal Region +chr22 51220000 51304500 High Signal Region +chr2 2298400 2300500 Low Mappability +chr2 3183400 3185800 High Signal Region +chr2 13858600 13877800 High Signal Region +chr2 33140400 33143500 High Signal Region +chr2 49455800 49457900 Low Mappability +chr2 62956900 62981700 High Signal Region +chr2 70656600 70659500 High Signal Region +chr2 86882000 86896800 High Signal Region +chr2 87441300 88290400 High Signal Region +chr2 89534800 89985900 High Signal Region +chr2 90267000 95326200 High Signal Region +chr2 95471500 95565900 Low Mappability +chr2 97718400 98232300 High Signal Region +chr2 109814800 109817200 High Signal Region +chr2 114147600 114441900 High Signal Region +chr2 132763200 132836700 Low Mappability +chr2 132946300 133122100 High Signal Region +chr2 149638400 149640300 Low Mappability +chr2 162134100 162148700 High Signal Region +chr2 230044500 230046500 High Signal Region +chr2 243052100 243199300 High Signal Region +chr3 612200 662600 Low Mappability +chr3 3762200 3767100 High Signal Region +chr3 4958300 4964200 High Signal Region +chr3 8414500 8434100 High Signal Region +chr3 15009100 15010800 Low Mappability +chr3 15228200 15245300 High Signal Region +chr3 16995500 17013700 Low Mappability +chr3 25740700 25759100 Low Mappability +chr3 26426200 26445700 High Signal Region +chr3 39913800 39931100 High Signal Region +chr3 43527700 43530900 Low Mappability +chr3 51490400 51496100 High Signal Region +chr3 63719200 63725000 High Signal Region +chr3 73159000 73161500 High Signal Region +chr3 75678100 75917700 High Signal Region +chr3 75982800 75999500 High Signal Region +chr3 78995600 78999800 High Signal Region +chr3 80490200 80492100 Low Mappability +chr3 80916400 80946200 High Signal Region +chr3 90205400 90224700 High Signal Region +chr3 90312300 93518500 High Signal Region +chr3 93957200 93959600 Low Mappability +chr3 96335200 96338100 Low Mappability +chr3 96457300 96459000 High Signal Region +chr3 98184700 98186900 High Signal Region +chr3 100827300 100833600 Low Mappability +chr3 107053900 107058800 Low Mappability +chr3 118633600 118639100 Low Mappability +chr3 135154400 135158200 High Signal Region +chr3 135304100 135329200 High Signal Region +chr3 139309800 139333000 High Signal Region +chr3 155996600 156002700 High Signal Region +chr3 157599200 157620600 High Signal Region +chr3 160658900 160666400 Low Mappability +chr3 169397300 169454100 High Signal Region +chr3 173977500 173983300 Low Mappability +chr3 175499400 175504900 High Signal Region +chr3 182734900 182736900 Low Mappability +chr3 183673200 183676500 High Signal Region +chr3 183796800 183798700 High Signal Region +chr3 185265900 185305500 Low Mappability +chr3 189237500 189238900 Low Mappability +chr3 195201400 195233900 Low Mappability +chr3 195341900 195476900 High Signal Region +chr3 195502200 195519800 High Signal Region +chr3 195640700 195745500 High Signal Region +chr3 196624700 196639200 High Signal Region +chr3 196757600 196762600 High Signal Region +chr3 197110400 197187600 High Signal Region +chr3 197325200 197407700 High Signal Region +chr3 197798000 198022400 High Signal Region +chr4 0 69600 High Signal Region +chr4 1420500 1478600 High Signal Region +chr4 9199300 9371400 High Signal Region +chr4 40293300 40341800 High Signal Region +chr4 49073900 52683800 High Signal Region +chr4 68263300 68273300 High Signal Region +chr4 70294400 70297700 High Signal Region +chr4 76806200 76808200 High Signal Region +chr4 80272500 80275600 High Signal Region +chr4 114909000 114911500 High Signal Region +chr4 120158500 120222800 High Signal Region +chr4 153843200 153846400 High Signal Region +chr4 167475600 167502500 Low Mappability +chr4 190153700 190157200 High Signal Region +chr4 190190600 190230500 High Signal Region +chr4 190469400 190685100 High Signal Region +chr4 190756300 190770700 High Signal Region +chr4 190795300 191154200 High Signal Region +chr5 0 85500 High Signal Region +chr5 629900 651800 High Signal Region +chr5 1326400 1334600 High Signal Region +chr5 2144800 2147800 High Signal Region +chr5 2490000 2491700 High Signal Region +chr5 3322200 3325200 High Signal Region +chr5 6967500 6971800 High Signal Region +chr5 14633500 14653400 Low Mappability +chr5 16335700 16341500 High Signal Region +chr5 17516900 17600400 High Signal Region +chr5 17631100 17633300 Low Mappability +chr5 21458600 21581100 High Signal Region +chr5 25360400 25384600 High Signal Region +chr5 32369500 32391600 High Signal Region +chr5 34177800 34246500 High Signal Region +chr5 45523000 45550600 High Signal Region +chr5 45932400 45978600 High Signal Region +chr5 46072400 46096800 High Signal Region +chr5 46239900 46241800 Low Mappability +chr5 46265500 49594200 High Signal Region +chr5 60055500 60058300 Low Mappability +chr5 68830000 70669400 High Signal Region +chr5 71145800 71149800 High Signal Region +chr5 73981300 74008300 High Signal Region +chr5 79945000 79949100 High Signal Region +chr5 80324700 80351700 High Signal Region +chr5 84936300 84958500 High Signal Region +chr5 90445100 90458900 High Signal Region +chr5 93283200 93284600 High Signal Region +chr5 93903700 93906100 Low Mappability +chr5 99381200 99426800 High Signal Region +chr5 113477000 113496900 High Signal Region +chr5 126439200 126461500 High Signal Region +chr5 130208300 130210400 High Signal Region +chr5 134258200 134265100 High Signal Region +chr5 136835200 136886000 High Signal Region +chr5 137304800 137310300 High Signal Region +chr5 138341100 138347500 High Signal Region +chr5 142677200 142690000 Low Mappability +chr5 143013900 143015800 High Signal Region +chr5 155138700 155189100 High Signal Region +chr5 156085200 156093100 High Signal Region +chr5 170510900 170517200 High Signal Region +chr5 173440700 173444600 High Signal Region +chr5 174540800 174565800 High Signal Region +chr5 175331400 175545200 High Signal Region +chr5 176017900 176019800 Low Mappability +chr5 177061900 177360500 High Signal Region +chr5 177387600 177408100 Low Mappability +chr5 178011600 178013600 High Signal Region +chr5 180599700 180915200 High Signal Region +chr6 0 162100 Low Mappability +chr6 256600 382800 High Signal Region +chr6 519000 521400 Low Mappability +chr6 851500 864200 High Signal Region +chr6 1428700 1434800 Low Mappability +chr6 2200300 2202700 Low Mappability +chr6 4809700 4840300 Low Mappability +chr6 5886400 5892500 Low Mappability +chr6 6141100 6143900 Low Mappability +chr6 6212500 6217900 Low Mappability +chr6 8770600 8776400 Low Mappability +chr6 9966100 9971700 High Signal Region +chr6 10984500 10987800 Low Mappability +chr6 14480600 14486400 Low Mappability +chr6 15189400 15190800 High Signal Region +chr6 20079900 20093100 Low Mappability +chr6 20615000 20619200 Low Mappability +chr6 22166500 22181200 Low Mappability +chr6 23232900 23235900 Low Mappability +chr6 26668800 26830200 High Signal Region +chr6 26850500 26925900 Low Mappability +chr6 30027900 30071800 Low Mappability +chr6 31783300 31806300 Low Mappability +chr6 33451700 33454300 High Signal Region +chr6 34038400 34041700 High Signal Region +chr6 37096000 37117300 High Signal Region +chr6 38241900 38269500 High Signal Region +chr6 44011400 44047700 High Signal Region +chr6 44148500 44150800 High Signal Region +chr6 45637100 45683300 Low Mappability +chr6 45814800 45817800 Low Mappability +chr6 45963800 45965300 Low Mappability +chr6 48331100 48336800 Low Mappability +chr6 48705800 48711100 Low Mappability +chr6 49759100 49764900 Low Mappability +chr6 50999100 51004700 High Signal Region +chr6 51531300 51535800 Low Mappability +chr6 54270500 54273400 High Signal Region +chr6 54364700 54372500 Low Mappability +chr6 54826700 54832300 Low Mappability +chr6 56911200 56913200 Low Mappability +chr6 56954700 56956700 Low Mappability +chr6 57133300 57608800 High Signal Region +chr6 57671300 57673300 Low Mappability +chr6 58061300 58288100 High Signal Region +chr6 58724800 58738300 Low Mappability +chr6 58772700 61920700 High Signal Region +chr6 62283100 62285000 Low Mappability +chr6 62371500 62383900 Low Mappability +chr6 62770600 62781900 High Signal Region +chr6 63265300 63298700 Low Mappability +chr6 65966100 65967700 Low Mappability +chr6 70193400 70231500 Low Mappability +chr6 71454100 71514000 Low Mappability +chr6 71981600 71986300 High Signal Region +chr6 72027300 72029200 Low Mappability +chr6 72875000 72876900 High Signal Region +chr6 73680200 73704400 Low Mappability +chr6 74417700 74420300 Low Mappability +chr6 74707400 74738700 Low Mappability +chr6 77455300 77457000 Low Mappability +chr6 77670600 77687700 Low Mappability +chr6 77752900 77797700 Low Mappability +chr6 78426700 78455800 Low Mappability +chr6 78508100 78509800 Low Mappability +chr6 79681400 79687300 Low Mappability +chr6 80401000 80403400 High Signal Region +chr6 81193300 81207500 Low Mappability +chr6 83257400 83275700 Low Mappability +chr6 86694600 86736600 Low Mappability +chr6 87552300 87637100 Low Mappability +chr6 89091200 89122300 Low Mappability +chr6 90764500 90769800 High Signal Region +chr6 91272000 91298000 High Signal Region +chr6 94341300 94347000 High Signal Region +chr6 95516600 95540000 Low Mappability +chr6 96310100 96313200 Low Mappability +chr6 97430400 97437100 Low Mappability +chr6 97824400 97828600 High Signal Region +chr6 99151500 99156200 Low Mappability +chr6 99314300 99316400 Low Mappability +chr6 100802600 100817600 High Signal Region +chr6 101028300 101034500 Low Mappability +chr6 101633800 101663000 Low Mappability +chr6 102617900 102623600 High Signal Region +chr6 102983200 102985100 Low Mappability +chr6 103200700 103206700 High Signal Region +chr6 104937300 104943400 Low Mappability +chr6 105185700 105210800 Low Mappability +chr6 107045300 107046900 Low Mappability +chr6 109454700 109471400 Low Mappability +chr6 109566300 109571600 Low Mappability +chr6 112224100 112229600 Low Mappability +chr6 112853400 112873000 Low Mappability +chr6 114754200 114756900 Low Mappability +chr6 115121100 115123800 Low Mappability +chr6 115496600 115502400 Low Mappability +chr6 115575100 115578000 High Signal Region +chr6 116960800 116966000 High Signal Region +chr6 117134700 117144000 Low Mappability +chr6 117413300 117429300 Low Mappability +chr6 119557600 119559600 High Signal Region +chr6 121732200 121734100 Low Mappability +chr6 121887100 121892400 Low Mappability +chr6 123793600 123799300 Low Mappability +chr6 125028000 125052900 High Signal Region +chr6 125126000 125131800 Low Mappability +chr6 129226700 129244600 Low Mappability +chr6 131556000 131561800 Low Mappability +chr6 132019100 132037100 Low Mappability +chr6 132177400 132179000 Low Mappability +chr6 133341700 133347800 Low Mappability +chr6 133593100 133595000 High Signal Region +chr6 136492700 136494600 Low Mappability +chr6 138120400 138136600 Low Mappability +chr6 142456500 142469200 Low Mappability +chr6 144117700 144122900 High Signal Region +chr6 145393200 145395000 Low Mappability +chr6 145824200 145826400 Low Mappability +chr6 145984700 146002900 Low Mappability +chr6 146291400 146318300 Low Mappability +chr6 148276600 148278600 Low Mappability +chr6 148480500 148484700 Low Mappability +chr6 150782100 150797500 Low Mappability +chr6 156062900 156064800 High Signal Region +chr6 156355300 156361300 High Signal Region +chr6 156646100 156651900 High Signal Region +chr6 156803000 156804800 Low Mappability +chr6 157730500 157736300 High Signal Region +chr6 160073000 160134300 Low Mappability +chr6 161032400 161068500 Low Mappability +chr6 165716800 165720000 Low Mappability +chr6 165782200 165787800 Low Mappability +chr6 166828700 166843000 Low Mappability +chr6 167196600 167208400 Low Mappability +chr6 167745800 167752500 Low Mappability +chr6 167786100 167802900 Low Mappability +chr6 168635100 168638700 Low Mappability +chr6 168961200 168963300 Low Mappability +chr6 169054200 169061300 High Signal Region +chr6 169239700 169241700 Low Mappability +chr6 170460500 170462500 Low Mappability +chr6 170528700 170531000 Low Mappability +chr6 170686000 170710200 High Signal Region +chr6 170774700 170777400 Low Mappability +chr6 170803900 170839700 Low Mappability +chr6 170915300 171115000 Low Mappability +chr7 0 49700 High Signal Region +chr7 1311000 1313200 High Signal Region +chr7 45290700 45292600 Low Mappability +chr7 56437000 56447500 High Signal Region +chr7 57544900 57557600 High Signal Region +chr7 57597800 57782700 High Signal Region +chr7 57884200 62120800 High Signal Region +chr7 62403000 62404900 High Signal Region +chr7 64929600 65063200 High Signal Region +chr7 84878700 84884900 High Signal Region +chr7 100549000 100611600 High Signal Region +chr7 100634800 100648100 High Signal Region +chr7 101981900 102013400 High Signal Region +chr7 102114900 102445700 High Signal Region +chr7 121919200 121925000 High Signal Region +chr7 140761800 140784200 High Signal Region +chr7 142373000 142376300 Low Mappability +chr7 145693500 145735200 Low Mappability +chr7 152072600 152132400 High Signal Region +chr7 157924100 157945100 High Signal Region +chr7 158387000 158388900 High Signal Region +chr7 158685900 158710600 High Signal Region +chr8 0 185300 High Signal Region +chr8 7012600 8066200 High Signal Region +chr8 11994400 12230100 High Signal Region +chr8 12252000 12466300 High Signal Region +chr8 13501500 13503800 High Signal Region +chr8 43091800 43118200 High Signal Region +chr8 43758900 46908900 High Signal Region +chr8 46946900 46959100 High Signal Region +chr8 47367600 47369500 High Signal Region +chr8 48792700 48794600 High Signal Region +chr8 51581600 51584700 High Signal Region +chr8 52729900 52737900 High Signal Region +chr8 58117400 58128700 High Signal Region +chr8 59283300 59288700 High Signal Region +chr8 60782300 60800500 High Signal Region +chr8 70600600 70603500 High Signal Region +chr8 82753700 82764200 High Signal Region +chr8 86554300 86841600 High Signal Region +chr8 100501000 100509100 High Signal Region +chr8 104795400 104807700 High Signal Region +chr8 106801200 106807000 High Signal Region +chr8 127325400 127331100 High Signal Region +chr8 142501600 142503600 High Signal Region +chr8 144743300 144752700 High Signal Region +chr9 6593800 6595700 High Signal Region +chr9 35903000 35915300 High Signal Region +chr9 40815000 43489000 High Signal Region +chr9 43684600 44102400 High Signal Region +chr9 44852100 44881200 High Signal Region +chr9 44908300 66250200 High Signal Region +chr9 66344100 68143800 High Signal Region +chr9 68306800 69121200 High Signal Region +chr9 69141700 70957900 High Signal Region +chr9 72652100 72654500 High Signal Region +chr9 78789200 78791100 High Signal Region +chr9 79185700 79187900 High Signal Region +chr9 87779800 87780900 High Signal Region +chr9 140221300 140223800 High Signal Region +chr9 141053300 141213400 Low Mappability +chrX 0 290100 High Signal Region +chrX 392200 529200 Low Mappability +chrX 1006400 1334000 High Signal Region +chrX 7505600 7509900 High Signal Region +chrX 9371800 9400200 High Signal Region +chrX 49164900 49386300 High Signal Region +chrX 55207100 55210900 Low Mappability +chrX 58329700 58433500 High Signal Region +chrX 58461000 61920100 High Signal Region +chrX 62005100 62007000 High Signal Region +chrX 78057800 78060000 High Signal Region +chrX 99512200 99516600 High Signal Region +chrX 101446100 101744100 High Signal Region +chrX 108258600 108312300 High Signal Region +chrX 111555900 111595100 High Signal Region +chrX 114959100 115006100 High Signal Region +chrX 125595300 125608200 Low Mappability +chrX 132242600 132250600 High Signal Region +chrX 134852300 134971100 High Signal Region +chrX 136518800 136521500 High Signal Region +chrX 154528900 154616300 High Signal Region +chrX 155038500 155270500 High Signal Region +chrY 7432700 13491000 High Signal Region +chrY 13633400 14289000 High Signal Region +chrY 28783400 59373500 High Signal Region diff --git a/assets/blacklists/v2.0/hg38-blacklist.v2.bed b/assets/blacklists/v2.0/hg38-blacklist.v2.bed new file mode 100644 index 000000000..3852ac0c5 --- /dev/null +++ b/assets/blacklists/v2.0/hg38-blacklist.v2.bed @@ -0,0 +1,636 @@ +chr10 0 45700 Low Mappability +chr10 38481300 38596500 High Signal Region +chr10 38782600 38967900 High Signal Region +chr10 39901300 41712900 High Signal Region +chr10 41838900 42107300 High Signal Region +chr10 42279400 42322500 High Signal Region +chr10 126946300 126953400 Low Mappability +chr10 133625800 133797400 High Signal Region +chr11 0 194500 Low Mappability +chr11 518900 520700 Low Mappability +chr11 584400 586500 High Signal Region +chr11 964100 966000 Low Mappability +chr11 1015700 1019300 High Signal Region +chr11 1091000 1098200 Low Mappability +chr11 3652800 3655600 High Signal Region +chr11 10506900 10511100 High Signal Region +chr11 28206300 28236700 High Signal Region +chr11 50813600 54383000 High Signal Region +chr11 61084500 61130400 High Signal Region +chr11 70370400 70372400 High Signal Region +chr11 73509800 73511700 High Signal Region +chr11 77885600 77887600 High Signal Region +chr11 93417500 93427700 High Signal Region +chr11 94232700 94240400 High Signal Region +chr11 103408700 103410600 High Signal Region +chr11 121175000 121187000 High Signal Region +chr11 131679500 131681500 High Signal Region +chr11 135075600 135086600 High Signal Region +chr12 0 77800 High Signal Region +chr12 371800 422400 High Signal Region +chr12 2254900 2257000 High Signal Region +chr12 2519800 2540500 Low Mappability +chr12 5928900 5933000 Low Mappability +chr12 20550500 20552400 Low Mappability +chr12 20768400 20770300 High Signal Region +chr12 29790400 29834600 High Signal Region +chr12 34715400 37269100 High Signal Region +chr12 41362700 41364600 High Signal Region +chr12 61471100 61473000 High Signal Region +chr12 66473900 66475800 High Signal Region +chr12 101147000 101155000 High Signal Region +chr12 113079600 113081500 High Signal Region +chr12 124430500 124440300 High Signal Region +chr12 124905900 124941800 High Signal Region +chr12 130386400 130394100 High Signal Region +chr12 131475300 131478600 High Signal Region +chr12 131576000 131589700 High Signal Region +chr12 132223300 132243400 High Signal Region +chr12 132455100 132465200 High Signal Region +chr12 133249000 133275300 High Signal Region +chr13 16087600 16165300 High Signal Region +chr13 16226300 18171400 High Signal Region +chr13 18211000 18216100 High Signal Region +chr13 57140500 57172500 High Signal Region +chr13 109423200 109425200 High Signal Region +chr13 114353300 114364300 Low Mappability +chr14 0 18670900 High Signal Region +chr14 18695400 19724300 High Signal Region +chr14 23033300 23098600 High Signal Region +chr14 26629300 26634900 High Signal Region +chr14 31793800 31798100 High Signal Region +chr14 32483400 32486000 High Signal Region +chr14 34537100 34562600 High Signal Region +chr14 35947200 35950000 High Signal Region +chr14 37351000 37356700 High Signal Region +chr14 44025100 44027200 High Signal Region +chr14 44705100 44709900 High Signal Region +chr14 45477100 45482500 High Signal Region +chr14 46865300 46866500 High Signal Region +chr14 54235600 54240000 High Signal Region +chr14 57112100 57118100 High Signal Region +chr14 74711700 74729000 High Signal Region +chr14 86074000 86076000 High Signal Region +chr14 86593300 86595200 High Signal Region +chr14 88443700 88458100 High Signal Region +chr14 100525900 100527800 High Signal Region +chr14 101267600 101272200 High Signal Region +chr14 101674400 101676400 High Signal Region +chr14 104288100 104290200 High Signal Region +chr14 105215000 105240900 High Signal Region +chr14 105568500 105583900 High Signal Region +chr14 105616500 105618600 High Signal Region +chr14 106326900 106367700 High Signal Region +chr15 0 17035000 High Signal Region +chr15 17058500 19790100 High Signal Region +chr15 20005600 22606300 High Signal Region +chr15 23125400 23357400 High Signal Region +chr15 25757700 25759100 Low Mappability +chr15 28304900 28683400 High Signal Region +chr15 30066300 30627500 High Signal Region +chr15 30844100 30859900 High Signal Region +chr15 32153700 32626200 High Signal Region +chr15 54925700 54932200 High Signal Region +chr15 56311200 56314600 High Signal Region +chr15 72635200 72687100 High Signal Region +chr15 74068100 74102000 High Signal Region +chr15 75254100 75299800 High Signal Region +chr15 77698600 77700600 High Signal Region +chr15 82321000 82374600 High Signal Region +chr15 82421200 82541700 High Signal Region +chr15 84405300 84524700 High Signal Region +chr15 101752300 101764800 Low Mappability +chr15 101892700 101991100 High Signal Region +chr16 29430800 29566900 Low Mappability +chr16 34061400 34121400 High Signal Region +chr16 34272000 34633100 High Signal Region +chr16 34657200 34672500 High Signal Region +chr16 34694600 34772000 High Signal Region +chr16 34832600 34922100 High Signal Region +chr16 34945600 35072500 Low Mappability +chr16 36166300 36202400 High Signal Region +chr16 36225200 46423000 High Signal Region +chr16 46449700 46467000 High Signal Region +chr16 90100500 90338300 Low Mappability +chr17 0 137600 High Signal Region +chr17 294900 317900 High Signal Region +chr17 448200 510900 High Signal Region +chr17 1061500 1066100 High Signal Region +chr17 1307700 1312000 Low Mappability +chr17 19025700 19237400 High Signal Region +chr17 21783300 22054000 High Signal Region +chr17 22520400 22527300 High Signal Region +chr17 22745200 26629800 High Signal Region +chr17 26766800 26987200 High Signal Region +chr17 43227600 43324300 High Signal Region +chr17 45511500 45641300 Low Mappability +chr17 53104900 53107300 High Signal Region +chr18 0 64600 High Signal Region +chr18 105200 113200 High Signal Region +chr18 971000 976500 High Signal Region +chr18 2841300 2861500 High Signal Region +chr18 15367200 20940300 High Signal Region +chr18 46961600 47031700 High Signal Region +chr18 47852300 47854300 Low Mappability +chr18 52791800 52793800 High Signal Region +chr18 74615900 74618100 High Signal Region +chr18 76966200 76968500 High Signal Region +chr18 78436900 78438700 Low Mappability +chr18 79013800 79040300 High Signal Region +chr18 79617800 79621500 High Signal Region +chr18 80257400 80373200 High Signal Region +chr19 0 271200 High Signal Region +chr19 7019100 7061300 High Signal Region +chr19 7449400 7452000 High Signal Region +chr19 8740100 8800500 High Signal Region +chr19 24330100 27274500 High Signal Region +chr19 27337600 27427400 High Signal Region +chr19 34386800 34393500 High Signal Region +chr19 34860600 34866200 High Signal Region +chr19 36267900 36313700 High Signal Region +chr19 37264900 37304300 High Signal Region +chr19 44393300 44416700 High Signal Region +chr19 47903000 47959700 High Signal Region +chr19 50090500 50140400 High Signal Region +chr19 58538700 58617600 High Signal Region +chr1 0 792500 High Signal Region +chr1 91386300 91388400 Low Mappability +chr1 103594400 103760600 High Signal Region +chr1 121605200 124938900 High Signal Region +chr1 125067600 125086000 High Signal Region +chr1 125130200 143562200 High Signal Region +chr1 161423100 161472400 High Signal Region +chr1 168348600 168349900 High Signal Region +chr1 224010800 224017000 High Signal Region +chr1 236713000 236715600 Low Mappability +chr1 248932700 248956400 High Signal Region +chr20 0 67900 High Signal Region +chr20 26364200 28916900 High Signal Region +chr20 28939400 29264700 High Signal Region +chr20 30995400 31246000 High Signal Region +chr20 47893800 47900200 High Signal Region +chr21 0 8679600 High Signal Region +chr21 9159900 9735300 High Signal Region +chr21 10013900 10069600 High Signal Region +chr21 10094700 10505100 High Signal Region +chr21 10650900 12965800 High Signal Region +chr21 43212400 43280900 High Signal Region +chr21 46682700 46709900 High Signal Region +chr22 10687700 11428100 High Signal Region +chr22 11496900 11873100 High Signal Region +chr22 11976900 15154400 High Signal Region +chr22 16258000 16385800 High Signal Region +chr22 18175900 18947300 High Signal Region +chr22 20337400 20343300 High Signal Region +chr22 21113500 21554000 High Signal Region +chr22 49972700 49975300 High Signal Region +chr22 50642800 50644900 High Signal Region +chr22 50786600 50818400 High Signal Region +chr2 1221700 1223900 High Signal Region +chr2 1594700 1605200 High Signal Region +chr2 3179600 3182100 High Signal Region +chr2 4643800 4648800 High Signal Region +chr2 10952800 10955000 High Signal Region +chr2 13718700 13737700 High Signal Region +chr2 21903500 21906400 High Signal Region +chr2 32865900 32869900 High Signal Region +chr2 32915300 32918400 High Signal Region +chr2 33766500 33768400 High Signal Region +chr2 36183000 36184500 High Signal Region +chr2 49228700 49230700 High Signal Region +chr2 64359300 64377000 High Signal Region +chr2 86655300 86661100 High Signal Region +chr2 86900700 87078100 Low Mappability +chr2 87119300 87189800 Low Mappability +chr2 87217000 87866200 High Signal Region +chr2 88771000 88806500 High Signal Region +chr2 89235300 89947100 High Signal Region +chr2 90246300 91735500 High Signal Region +chr2 91783000 91924800 Low Mappability +chr2 91969000 94569500 High Signal Region +chr2 95849400 96067900 High Signal Region +chr2 97106300 97615800 High Signal Region +chr2 109198400 109200700 High Signal Region +chr2 109744600 110095200 High Signal Region +chr2 110229200 110633400 Low Mappability +chr2 111253600 111500500 Low Mappability +chr2 112346200 112441300 Low Mappability +chr2 113370100 113662700 High Signal Region +chr2 130496800 130716400 High Signal Region +chr2 132201000 132288900 High Signal Region +chr2 132353600 132364500 High Signal Region +chr2 148880800 148882800 High Signal Region +chr2 161277700 161283400 High Signal Region +chr2 181274800 181276800 High Signal Region +chr2 226108500 226110400 High Signal Region +chr2 234889800 234894400 High Signal Region +chr2 239642200 239645600 High Signal Region +chr2 240308100 240310300 High Signal Region +chr2 241589300 241591800 High Signal Region +chr2 242005900 242011100 High Signal Region +chr2 242110100 242193500 High Signal Region +chr3 0 11600 High Signal Region +chr3 3895200 3896700 High Signal Region +chr3 4916700 4922500 High Signal Region +chr3 14091000 14092500 High Signal Region +chr3 15187200 15207800 High Signal Region +chr3 15592100 15603300 High Signal Region +chr3 16176800 16179200 High Signal Region +chr3 16679700 16682500 High Signal Region +chr3 19499700 19504000 High Signal Region +chr3 19624000 19627100 High Signal Region +chr3 21983200 21988100 High Signal Region +chr3 24053500 24054900 High Signal Region +chr3 26384800 26404100 High Signal Region +chr3 29993900 29999900 High Signal Region +chr3 36987500 36995000 High Signal Region +chr3 38083400 38085400 High Signal Region +chr3 38406100 38430900 High Signal Region +chr3 39366700 39386000 High Signal Region +chr3 40219400 40240500 High Signal Region +chr3 49671000 49696700 High Signal Region +chr3 51457800 51462000 High Signal Region +chr3 57326800 57328500 High Signal Region +chr3 65124100 65126100 High Signal Region +chr3 65510000 65513900 High Signal Region +chr3 65697400 65699300 High Signal Region +chr3 66273800 66275200 High Signal Region +chr3 68076400 68077800 High Signal Region +chr3 69047300 69053600 High Signal Region +chr3 69475300 69479700 High Signal Region +chr3 75630100 75707800 High Signal Region +chr3 75736400 75754600 High Signal Region +chr3 78948800 78950500 High Signal Region +chr3 80876000 80894000 High Signal Region +chr3 89345600 89370500 High Signal Region +chr3 90156400 90175500 High Signal Region +chr3 90455400 91297100 High Signal Region +chr3 91516200 93749200 High Signal Region +chr3 96616300 96619300 High Signal Region +chr3 97905100 97923200 High Signal Region +chr3 101674800 101698400 High Signal Region +chr3 103224300 103236500 High Signal Region +chr3 106665700 106669700 High Signal Region +chr3 106975900 106979600 High Signal Region +chr3 108751100 108755100 High Signal Region +chr3 111019500 111024600 High Signal Region +chr3 121933800 121936400 High Signal Region +chr3 122414300 122417500 High Signal Region +chr3 122735500 122796600 High Signal Region +chr3 122837000 122838700 High Signal Region +chr3 133177100 133179800 High Signal Region +chr3 133551500 133579500 High Signal Region +chr3 135437200 135439100 High Signal Region +chr3 136954600 136969200 High Signal Region +chr3 137168400 137169900 High Signal Region +chr3 138575800 138595900 High Signal Region +chr3 139190800 139194700 High Signal Region +chr3 153236200 153241300 High Signal Region +chr3 155544100 155546700 High Signal Region +chr3 156279000 156283500 High Signal Region +chr3 157080800 157093400 High Signal Region +chr3 158511300 158513100 High Signal Region +chr3 160941200 160948700 High Signal Region +chr3 161001900 161014100 High Signal Region +chr3 165573100 165591000 High Signal Region +chr3 166228200 166232400 High Signal Region +chr3 168012100 168016800 High Signal Region +chr3 170567000 170569900 High Signal Region +chr3 170864300 170881400 High Signal Region +chr3 171626600 171637700 High Signal Region +chr3 174829200 174831800 High Signal Region +chr3 176828700 176833000 High Signal Region +chr3 177660600 177664000 High Signal Region +chr3 178926800 178941300 High Signal Region +chr3 183016900 183019100 High Signal Region +chr3 183955400 183958700 High Signal Region +chr3 187893900 187896100 High Signal Region +chr3 192739300 192742700 High Signal Region +chr3 194323600 194334900 High Signal Region +chr3 195477900 195507300 High Signal Region +chr3 195616000 195750100 High Signal Region +chr3 195775500 195791400 High Signal Region +chr3 195914100 196028300 High Signal Region +chr3 196249400 196251900 High Signal Region +chr3 196897800 196899800 High Signal Region +chr3 197030600 197035800 High Signal Region +chr3 197383400 197428800 High Signal Region +chr3 197454700 197460800 High Signal Region +chr3 197598400 197680900 High Signal Region +chr3 198099800 198295500 High Signal Region +chr4 0 69200 High Signal Region +chr4 554100 556500 High Signal Region +chr4 1427000 1468900 High Signal Region +chr4 6002700 6005700 High Signal Region +chr4 7863000 7865000 High Signal Region +chr4 9212700 9369600 High Signal Region +chr4 40291700 40318200 High Signal Region +chr4 49077200 51816100 High Signal Region +chr4 55327200 55329200 High Signal Region +chr4 77994000 78009600 High Signal Region +chr4 119274400 119301700 High Signal Region +chr4 146285100 146305300 High Signal Region +chr4 162420500 162422400 High Signal Region +chr4 166554300 166581300 Low Mappability +chr4 181238800 181242300 Low Mappability +chr4 189232500 189236300 High Signal Region +chr4 189834900 189849700 High Signal Region +chr4 189877500 190023700 High Signal Region +chr4 190048600 190214500 High Signal Region +chr5 0 44100 High Signal Region +chr5 548300 564100 High Signal Region +chr5 647600 651700 High Signal Region +chr5 1326100 1334600 High Signal Region +chr5 2144600 2147800 High Signal Region +chr5 2489800 2491700 High Signal Region +chr5 3322100 3325100 High Signal Region +chr5 6967700 6971700 High Signal Region +chr5 17516800 17600200 High Signal Region +chr5 21477600 21497600 High Signal Region +chr5 25381400 25384300 High Signal Region +chr5 34177900 34244800 High Signal Region +chr5 45522900 45525200 High Signal Region +chr5 45743000 45744800 High Signal Region +chr5 46433900 46687700 High Signal Region +chr5 46708100 50165300 High Signal Region +chr5 60759700 60762500 High Signal Region +chr5 63320900 63335500 High Signal Region +chr5 69540700 71359500 High Signal Region +chr5 71850000 71852800 High Signal Region +chr5 74685400 74712400 High Signal Region +chr5 78452400 78457600 High Signal Region +chr5 78848400 78872800 High Signal Region +chr5 80649100 80653100 High Signal Region +chr5 85641800 85662700 High Signal Region +chr5 93947500 93948900 High Signal Region +chr5 94567100 94570400 High Signal Region +chr5 100045500 100076300 High Signal Region +chr5 106425500 106429500 High Signal Region +chr5 109259500 109265400 High Signal Region +chr5 111302100 111308300 High Signal Region +chr5 114156700 114158300 High Signal Region +chr5 119904000 119905600 High Signal Region +chr5 123760300 123762200 High Signal Region +chr5 134922500 134929400 High Signal Region +chr5 139005500 139011600 High Signal Region +chr5 146610000 146615500 High Signal Region +chr5 153071100 153077000 High Signal Region +chr5 156658300 156665400 High Signal Region +chr5 161606000 161611700 High Signal Region +chr5 171083900 171090200 High Signal Region +chr5 175904500 176118000 High Signal Region +chr5 176590700 176593000 High Signal Region +chr5 177636700 177684700 High Signal Region +chr5 177960500 177981400 High Signal Region +chr5 178584600 178586600 High Signal Region +chr5 181172600 181538200 High Signal Region +chr6 256500 382800 High Signal Region +chr6 861500 864200 High Signal Region +chr6 1052800 1054800 High Signal Region +chr6 26669200 26832300 High Signal Region +chr6 33484600 33486400 High Signal Region +chr6 34070600 34074000 High Signal Region +chr6 38262000 38301600 High Signal Region +chr6 39455800 39460500 High Signal Region +chr6 44043600 44080000 High Signal Region +chr6 44180600 44182900 High Signal Region +chr6 51874900 51901300 High Signal Region +chr6 54961900 54967400 High Signal Region +chr6 58432200 60242300 High Signal Region +chr6 61321800 61493000 High Signal Region +chr6 61573200 61575100 Low Mappability +chr6 61661900 61673400 High Signal Region +chr6 103709000 103715100 High Signal Region +chr6 115254900 115256800 High Signal Region +chr6 143799900 143801800 High Signal Region +chr6 156035300 156040100 High Signal Region +chr6 157309500 157324800 High Signal Region +chr6 160611700 160647400 High Signal Region +chr6 170145300 170147200 High Signal Region +chr6 170376900 170401000 High Signal Region +chr6 170465400 170468400 High Signal Region +chr7 0 49600 High Signal Region +chr7 224500 241300 High Signal Region +chr7 904700 907100 Low Mappability +chr7 1271400 1273500 High Signal Region +chr7 45251000 45253000 High Signal Region +chr7 56369500 56375600 High Signal Region +chr7 57485300 57497800 High Signal Region +chr7 57611600 57637700 Low Mappability +chr7 58031800 60997400 High Signal Region +chr7 61017800 61075200 High Signal Region +chr7 61102900 61725200 Low Mappability +chr7 62265700 62409500 High Signal Region +chr7 62430000 62520600 High Signal Region +chr7 65488000 65496500 High Signal Region +chr7 100951400 100968300 High Signal Region +chr7 100991500 101004600 High Signal Region +chr7 102474700 102686400 High Signal Region +chr7 142665100 142668500 High Signal Region +chr7 144180800 144377300 Low Mappability +chr7 145996400 146018600 High Signal Region +chr7 152375800 152435100 High Signal Region +chr7 158131400 158156200 High Signal Region +chr7 158594300 158596200 High Signal Region +chr7 158893100 158918100 High Signal Region +chr7 159334900 159345900 High Signal Region +chr8 7209800 7914700 High Signal Region +chr8 7940500 8075700 High Signal Region +chr8 8128200 8204600 High Signal Region +chr8 12136900 12614300 High Signal Region +chr8 43236700 43262600 High Signal Region +chr8 43937900 45969600 High Signal Region +chr8 46829400 46832000 High Signal Region +chr8 57204900 57216100 High Signal Region +chr8 59168700 59170400 High Signal Region +chr8 67584500 67592700 High Signal Region +chr8 69688400 69691100 High Signal Region +chr8 71406700 71412400 High Signal Region +chr8 75444100 75448200 High Signal Region +chr8 81841500 81851900 High Signal Region +chr8 85642100 85829300 High Signal Region +chr8 88685900 88691700 High Signal Region +chr8 96171200 96173100 High Signal Region +chr8 99494900 99496800 High Signal Region +chr8 105789200 105793800 High Signal Region +chr8 141491400 141493500 High Signal Region +chr8 141871100 141875200 High Signal Region +chr8 143641400 143670500 High Signal Region +chr8 144124800 144137600 High Signal Region +chr9 319900 322400 High Signal Region +chr9 33656600 33660000 High Signal Region +chr9 35912600 35915300 High Signal Region +chr9 38824200 39089400 High Signal Region +chr9 39846200 40771100 High Signal Region +chr9 40792500 41323100 High Signal Region +chr9 41492300 41635600 High Signal Region +chr9 41661300 42119600 Low Mappability +chr9 42364000 42410600 High Signal Region +chr9 42899400 42901300 High Signal Region +chr9 43263100 61518900 High Signal Region +chr9 61735300 63548000 High Signal Region +chr9 63761400 64027300 High Signal Region +chr9 64135000 65390600 High Signal Region +chr9 65579400 66874600 High Signal Region +chr9 66959000 68398100 High Signal Region +chr9 70037200 70039600 High Signal Region +chr9 76174300 76176200 High Signal Region +chr9 83222900 83226900 High Signal Region +chr9 85071600 85075100 High Signal Region +chr9 85164800 85166100 High Signal Region +chr9 108502000 108506600 High Signal Region +chr9 134164500 134185500 High Signal Region +chr9 137326800 137330600 High Signal Region +chr9 137715200 137722200 Low Mappability +chr9 137841200 137846800 Low Mappability +chr9 138222000 138394700 High Signal Region +chrX 0 329300 High Signal Region +chrX 362400 388500 High Signal Region +chrX 456500 531800 High Signal Region +chrX 723800 739500 High Signal Region +chrX 864500 930400 High Signal Region +chrX 1049100 1054300 High Signal Region +chrX 1085100 1175500 High Signal Region +chrX 1200600 1209400 High Signal Region +chrX 1249200 1269000 High Signal Region +chrX 1289500 1298900 High Signal Region +chrX 1365300 1458700 High Signal Region +chrX 1480900 1492800 High Signal Region +chrX 1816200 1820600 High Signal Region +chrX 2223900 2521900 High Signal Region +chrX 2580600 2751300 High Signal Region +chrX 3966700 3968700 High Signal Region +chrX 5481200 5486100 High Signal Region +chrX 6933400 6938700 High Signal Region +chrX 7587600 7591800 High Signal Region +chrX 9403600 9415100 High Signal Region +chrX 10785000 10809700 High Signal Region +chrX 10966600 10976800 High Signal Region +chrX 11218800 11221100 Low Mappability +chrX 11840900 11848000 High Signal Region +chrX 14085100 14109500 High Signal Region +chrX 14286500 14289300 High Signal Region +chrX 16361200 16366000 High Signal Region +chrX 16498100 16503400 High Signal Region +chrX 19940200 19946300 High Signal Region +chrX 21340600 21345700 High Signal Region +chrX 25773300 25776000 High Signal Region +chrX 26176400 26181400 High Signal Region +chrX 30767800 30772600 High Signal Region +chrX 31077600 31082600 High Signal Region +chrX 31511400 31535800 High Signal Region +chrX 34416800 34425900 High Signal Region +chrX 36465200 36471200 High Signal Region +chrX 37628400 37633500 High Signal Region +chrX 42872300 42910700 High Signal Region +chrX 49317500 49623500 High Signal Region +chrX 50019400 50033700 High Signal Region +chrX 50056700 50066100 High Signal Region +chrX 51202300 51268100 High Signal Region +chrX 51427500 51432400 High Signal Region +chrX 52175000 52228100 High Signal Region +chrX 52442800 52538100 High Signal Region +chrX 53761700 53789500 High Signal Region +chrX 55180400 55184500 High Signal Region +chrX 56754900 56781100 Low Mappability +chrX 57712300 57719700 High Signal Region +chrX 58467900 62522800 High Signal Region +chrX 63129600 63290600 Low Mappability +chrX 67311800 67323800 High Signal Region +chrX 67626800 67632300 High Signal Region +chrX 68217300 68230200 High Signal Region +chrX 70600000 70603800 High Signal Region +chrX 70640600 70645000 High Signal Region +chrX 70963600 70964900 High Signal Region +chrX 71978800 71980500 High Signal Region +chrX 72489400 72490800 High Signal Region +chrX 72743200 73035800 High Signal Region +chrX 73381000 73387000 High Signal Region +chrX 73887000 73891300 High Signal Region +chrX 74660000 74718100 High Signal Region +chrX 74789000 74794000 High Signal Region +chrX 74952200 74995200 High Signal Region +chrX 78802400 78804500 High Signal Region +chrX 79765500 79789600 High Signal Region +chrX 80534100 80537000 High Signal Region +chrX 82849700 82859300 Low Mappability +chrX 83752100 83756900 High Signal Region +chrX 86046600 86076600 High Signal Region +chrX 86395500 86398100 High Signal Region +chrX 86970000 86975600 High Signal Region +chrX 87220500 87222100 High Signal Region +chrX 89060200 89062700 High Signal Region +chrX 89202500 89208400 High Signal Region +chrX 91332900 91336600 High Signal Region +chrX 93618000 93633400 High Signal Region +chrX 94863600 94868300 High Signal Region +chrX 97509600 97515000 High Signal Region +chrX 100135800 100141000 High Signal Region +chrX 100257100 100261600 High Signal Region +chrX 101471700 101474900 High Signal Region +chrX 102188700 102489200 High Signal Region +chrX 103851800 103897800 High Signal Region +chrX 106755500 106769400 High Signal Region +chrX 106813900 106830900 High Signal Region +chrX 107515800 107517200 High Signal Region +chrX 109034800 109069100 High Signal Region +chrX 109114900 109119400 High Signal Region +chrX 109520800 109525700 High Signal Region +chrX 109985900 109987300 High Signal Region +chrX 110816700 110833400 High Signal Region +chrX 111416100 111418000 High Signal Region +chrX 113141700 113143600 High Signal Region +chrX 114701600 114724300 High Signal Region +chrX 115725600 115889600 High Signal Region +chrX 116557600 116595600 High Signal Region +chrX 117874100 117880000 High Signal Region +chrX 118009000 118037800 High Signal Region +chrX 118070900 118072700 High Signal Region +chrX 121263700 121268100 High Signal Region +chrX 121299200 121300600 High Signal Region +chrX 122528400 122550000 High Signal Region +chrX 124584300 124588400 High Signal Region +chrX 125927600 125937100 High Signal Region +chrX 126463700 126474200 High Signal Region +chrX 127116700 127122600 High Signal Region +chrX 127362200 127368300 High Signal Region +chrX 128785000 128788700 High Signal Region +chrX 129337600 129357900 High Signal Region +chrX 129388400 129408400 High Signal Region +chrX 130567700 130572000 High Signal Region +chrX 131152200 131157400 High Signal Region +chrX 131378300 131383300 High Signal Region +chrX 131664300 131670000 High Signal Region +chrX 132284600 132320400 High Signal Region +chrX 133108600 133116500 High Signal Region +chrX 135718600 135888700 High Signal Region +chrX 137074700 137079100 High Signal Region +chrX 137436600 137439300 High Signal Region +chrX 138300600 138302200 High Signal Region +chrX 139437600 139446800 High Signal Region +chrX 139621500 139622800 High Signal Region +chrX 140722400 140726100 High Signal Region +chrX 141000400 141108300 High Signal Region +chrX 142478000 142483800 High Signal Region +chrX 142892300 142911600 High Signal Region +chrX 143352000 143356500 High Signal Region +chrX 144404500 144475900 Low Mappability +chrX 147281700 147287100 High Signal Region +chrX 147653800 147659900 High Signal Region +chrX 148123500 148129000 High Signal Region +chrX 148347100 148378700 High Signal Region +chrX 149437900 149441900 High Signal Region +chrX 150024800 150026200 High Signal Region +chrX 152173800 152175100 High Signal Region +chrX 153251200 153316400 High Signal Region +chrX 154870000 154890200 High Signal Region +chrX 154938900 154945100 High Signal Region +chrX 155299600 155305100 High Signal Region +chrX 155454000 155522000 High Signal Region +chrX 155700400 155727500 High Signal Region +chrX 155983500 156040800 High Signal Region +chrY 4343800 4345800 High Signal Region +chrY 10246200 11041200 High Signal Region +chrY 11072100 11335300 High Signal Region +chrY 11486600 11757800 High Signal Region +chrY 26637300 57227400 High Signal Region diff --git a/assets/blacklists/v2.0/mm10-blacklist.v2.bed b/assets/blacklists/v2.0/mm10-blacklist.v2.bed new file mode 100644 index 000000000..e8ff4cc11 --- /dev/null +++ b/assets/blacklists/v2.0/mm10-blacklist.v2.bed @@ -0,0 +1,3435 @@ +chr10 0 3135400 High Signal Region +chr10 3218900 3276600 Low Mappability +chr10 3576900 3627700 Low Mappability +chr10 4191100 4197600 Low Mappability +chr10 4613500 4615400 High Signal Region +chr10 4761300 4763900 High Signal Region +chr10 5080800 5096600 Low Mappability +chr10 5580100 5586600 Low Mappability +chr10 6281200 6286700 High Signal Region +chr10 6740200 6742100 High Signal Region +chr10 7396300 7429800 High Signal Region +chr10 7633600 7636600 Low Mappability +chr10 7889700 7897500 High Signal Region +chr10 8144900 8153000 High Signal Region +chr10 8264000 8269200 High Signal Region +chr10 8382400 8404400 High Signal Region +chr10 8599200 8606400 Low Mappability +chr10 10012200 10033400 High Signal Region +chr10 10566900 10593500 High Signal Region +chr10 11218400 11224800 Low Mappability +chr10 11351800 11406300 Low Mappability +chr10 11491200 11493100 High Signal Region +chr10 11612300 11642500 High Signal Region +chr10 11692500 11701300 Low Mappability +chr10 12266500 12273000 High Signal Region +chr10 12385800 12396000 High Signal Region +chr10 13401200 13403100 High Signal Region +chr10 14559900 14577100 High Signal Region +chr10 14646300 14664500 Low Mappability +chr10 14923800 14928300 High Signal Region +chr10 15047600 15083100 High Signal Region +chr10 15528600 15534200 High Signal Region +chr10 15567000 15641800 High Signal Region +chr10 16967500 16971600 High Signal Region +chr10 17499600 17501700 High Signal Region +chr10 18555500 18558100 High Signal Region +chr10 19427600 19429100 High Signal Region +chr10 19538800 19546100 Low Mappability +chr10 19772200 19801600 High Signal Region +chr10 20458900 20460800 High Signal Region +chr10 21208600 21216600 Low Mappability +chr10 21278500 21313500 High Signal Region +chr10 21642200 21649600 Low Mappability +chr10 21727800 21736400 Low Mappability +chr10 22031300 22063500 High Signal Region +chr10 22127200 22164500 High Signal Region +chr10 22186700 22290500 High Signal Region +chr10 22369100 22472300 High Signal Region +chr10 22683100 22690600 Low Mappability +chr10 22935900 22941800 High Signal Region +chr10 24687500 24691700 Low Mappability +chr10 25091400 25106900 Low Mappability +chr10 25622900 25629400 Low Mappability +chr10 25968400 25973400 Low Mappability +chr10 26641500 26662800 Low Mappability +chr10 27403200 27407600 High Signal Region +chr10 27904000 27909500 High Signal Region +chr10 28908500 28940600 High Signal Region +chr10 29243900 29249600 High Signal Region +chr10 29924300 29930700 Low Mappability +chr10 29954000 29971900 High Signal Region +chr10 30553000 30577100 High Signal Region +chr10 31054900 31095900 Low Mappability +chr10 31406500 31411100 High Signal Region +chr10 31750000 31757100 Low Mappability +chr10 31878400 31885800 High Signal Region +chr10 31980100 32000400 Low Mappability +chr10 32039700 32045000 High Signal Region +chr10 32176100 32182400 High Signal Region +chr10 32499200 32529900 High Signal Region +chr10 32816400 32857200 High Signal Region +chr10 33315300 33319800 High Signal Region +chr10 33492300 33508900 High Signal Region +chr10 33886600 33901100 Low Mappability +chr10 34739400 34749100 Low Mappability +chr10 35669300 35725500 High Signal Region +chr10 36130200 36135500 High Signal Region +chr10 36160700 36166700 High Signal Region +chr10 36594500 36597500 Low Mappability +chr10 36942200 36948800 Low Mappability +chr10 37186500 37189300 High Signal Region +chr10 37799700 37821400 High Signal Region +chr10 37964600 37970100 High Signal Region +chr10 38590100 38606100 High Signal Region +chr10 38637900 38644200 High Signal Region +chr10 38729400 38782700 High Signal Region +chr10 38933500 38956500 High Signal Region +chr10 39126700 39129400 High Signal Region +chr10 39760700 39764700 High Signal Region +chr10 41185700 41195800 High Signal Region +chr10 41840500 41859100 Low Mappability +chr10 43769400 43773800 High Signal Region +chr10 44206300 44254100 High Signal Region +chr10 45515000 45588000 Low Mappability +chr10 45624800 45628400 High Signal Region +chr10 46136500 46139300 High Signal Region +chr10 46468300 46472100 High Signal Region +chr10 46500500 46538800 High Signal Region +chr10 46789300 46812500 High Signal Region +chr10 46966700 47009000 High Signal Region +chr10 47048600 47074700 Low Mappability +chr10 47663600 47683500 High Signal Region +chr10 47743600 47758500 High Signal Region +chr10 47875400 47881600 High Signal Region +chr10 48032400 48058800 High Signal Region +chr10 48677400 48682800 High Signal Region +chr10 49823500 49842200 High Signal Region +chr10 50029200 50035300 High Signal Region +chr10 50109900 50115500 High Signal Region +chr10 50178500 50184800 High Signal Region +chr10 50253700 50296500 High Signal Region +chr10 50333400 50335300 High Signal Region +chr10 50524000 50553900 High Signal Region +chr10 51126200 51132900 High Signal Region +chr10 51436800 51448000 High Signal Region +chr10 51470300 51474900 High Signal Region +chr10 51882900 51888000 Low Mappability +chr10 52052600 52059000 Low Mappability +chr10 52089600 52148500 High Signal Region +chr10 52522600 52599800 High Signal Region +chr10 53073900 53081100 High Signal Region +chr10 53569600 53576000 Low Mappability +chr10 54216200 54222900 High Signal Region +chr10 54588800 54619900 Low Mappability +chr10 55080400 55090500 High Signal Region +chr10 55654500 55659600 High Signal Region +chr10 55715600 55751000 High Signal Region +chr10 55841700 55847900 High Signal Region +chr10 56250200 56293900 High Signal Region +chr10 56701000 56728000 High Signal Region +chr10 56894100 56897300 High Signal Region +chr10 57099200 57153200 High Signal Region +chr10 57239100 57245400 High Signal Region +chr10 57326900 57333900 High Signal Region +chr10 57434000 57456500 High Signal Region +chr10 57678600 57684900 High Signal Region +chr10 57862800 58240900 High Signal Region +chr10 58566200 58570900 High Signal Region +chr10 59381400 59396800 Low Mappability +chr10 59850500 59922300 Low Mappability +chr10 60444900 60446800 High Signal Region +chr10 60546600 60553100 Low Mappability +chr10 61373100 61375000 High Signal Region +chr10 63103900 63111200 Low Mappability +chr10 63508800 63519000 High Signal Region +chr10 63833800 63835000 High Signal Region +chr10 64418600 64420000 High Signal Region +chr10 65166300 65172600 High Signal Region +chr10 65450400 65477700 High Signal Region +chr10 65638900 65670200 High Signal Region +chr10 65938900 65956300 Low Mappability +chr10 66422900 66431000 High Signal Region +chr10 66662400 66678300 High Signal Region +chr10 69030100 69065800 High Signal Region +chr10 70657500 70668500 High Signal Region +chr10 70785400 70798600 Low Mappability +chr10 71012700 71019200 Low Mappability +chr10 71111600 71114200 Low Mappability +chr10 71510600 71637800 High Signal Region +chr10 71691300 71698600 Low Mappability +chr10 72292400 72314300 High Signal Region +chr10 72359200 72360700 High Signal Region +chr10 72493500 72499200 High Signal Region +chr10 72590700 72591900 High Signal Region +chr10 72690900 72709500 High Signal Region +chr10 73378200 73380100 High Signal Region +chr10 73576400 73601900 High Signal Region +chr10 74433300 74439500 High Signal Region +chr10 74655700 74672200 High Signal Region +chr10 74715300 74746600 High Signal Region +chr10 74857500 74888000 High Signal Region +chr10 76835100 76852400 High Signal Region +chr10 77950600 77979500 Low Mappability +chr10 78008300 78028800 Low Mappability +chr10 78637000 78696000 High Signal Region +chr10 78731500 78735800 High Signal Region +chr10 78803500 78823100 Low Mappability +chr10 79207800 79259400 High Signal Region +chr10 79314000 79354000 Low Mappability +chr10 80102300 80116000 High Signal Region +chr10 80928600 80996300 Low Mappability +chr10 81167600 81199400 High Signal Region +chr10 81600900 81997900 High Signal Region +chr10 82517500 82538800 High Signal Region +chr10 82571100 82575200 High Signal Region +chr10 82939800 82956300 High Signal Region +chr10 83386600 83392400 Low Mappability +chr10 83670800 83678100 Low Mappability +chr10 83768200 83792700 Low Mappability +chr10 84155900 84180800 Low Mappability +chr10 84436900 84473700 Low Mappability +chr10 84744500 84750100 Low Mappability +chr10 85413200 85419700 Low Mappability +chr10 85696600 85732800 High Signal Region +chr10 85840200 85872500 High Signal Region +chr10 86561700 86565700 High Signal Region +chr10 88628700 88658500 Low Mappability +chr10 88963900 88968200 Low Mappability +chr10 89398700 89400100 High Signal Region +chr10 89949700 89964500 High Signal Region +chr10 90249000 90255300 High Signal Region +chr10 90324500 90329800 Low Mappability +chr10 90471200 90474200 Low Mappability +chr10 91252200 91256900 High Signal Region +chr10 91928900 91944500 High Signal Region +chr10 92909200 92915800 High Signal Region +chr10 94362500 94369300 Low Mappability +chr10 94591500 94610000 High Signal Region +chr10 94871200 94873100 High Signal Region +chr10 96068700 96078800 High Signal Region +chr10 96157200 96162600 Low Mappability +chr10 96192400 96199800 Low Mappability +chr10 97320500 97329700 High Signal Region +chr10 97525500 97534200 Low Mappability +chr10 97755000 97761200 High Signal Region +chr10 97896600 97920300 High Signal Region +chr10 98337800 98343700 High Signal Region +chr10 98433100 98444100 High Signal Region +chr10 100310500 100395900 High Signal Region +chr10 102667700 102669600 High Signal Region +chr10 102859800 102861500 High Signal Region +chr10 103500200 103519100 High Signal Region +chr10 103547000 103548600 High Signal Region +chr10 103569600 103575200 High Signal Region +chr10 103600400 103684400 High Signal Region +chr10 103936700 103942500 High Signal Region +chr10 104380700 104382300 High Signal Region +chr10 104493600 104499800 High Signal Region +chr10 104539700 104562500 Low Mappability +chr10 104748100 104771500 High Signal Region +chr10 104819400 104862500 Low Mappability +chr10 104966900 105001700 Low Mappability +chr10 105177000 105181900 Low Mappability +chr10 105672500 105678000 Low Mappability +chr10 106166900 106235700 High Signal Region +chr10 106382800 106403000 High Signal Region +chr10 106427100 106453600 High Signal Region +chr10 106529600 106535200 Low Mappability +chr10 107125500 107136900 Low Mappability +chr10 107551800 107560700 High Signal Region +chr10 107845300 107863900 High Signal Region +chr10 107978900 108006700 Low Mappability +chr10 109212600 109216800 High Signal Region +chr10 109315100 109322400 Low Mappability +chr10 109941600 109948000 High Signal Region +chr10 110104900 110111300 Low Mappability +chr10 110504500 110516000 High Signal Region +chr10 110667700 110700900 Low Mappability +chr10 111217500 111219000 High Signal Region +chr10 112013700 112021700 High Signal Region +chr10 112053500 112058400 Low Mappability +chr10 112540600 112542100 High Signal Region +chr10 112587000 112611100 High Signal Region +chr10 112682400 112722100 Low Mappability +chr10 113722600 113729800 Low Mappability +chr10 114167300 114174900 High Signal Region +chr10 114736400 114738300 High Signal Region +chr10 114860600 114866900 High Signal Region +chr10 115641300 115643100 High Signal Region +chr10 116606200 116613400 Low Mappability +chr10 116762000 116764200 High Signal Region +chr10 116878000 116879900 High Signal Region +chr10 117476200 117491000 High Signal Region +chr10 118014300 118033200 High Signal Region +chr10 118054000 118076600 High Signal Region +chr10 118199900 118279700 Low Mappability +chr10 118910200 118917100 High Signal Region +chr10 118937400 118953000 Low Mappability +chr10 119698800 119701600 Low Mappability +chr10 120974800 120977500 High Signal Region +chr10 121136000 121143400 Low Mappability +chr10 121164700 121169300 Low Mappability +chr10 121566100 121580200 High Signal Region +chr10 121707800 121713500 High Signal Region +chr10 121762300 121769400 High Signal Region +chr10 122141100 122166000 High Signal Region +chr10 122346900 122371300 Low Mappability +chr10 122632400 122638000 High Signal Region +chr10 122832900 122839300 High Signal Region +chr10 123792900 123797100 High Signal Region +chr10 124412900 124433300 High Signal Region +chr10 124576300 124583500 Low Mappability +chr10 124605700 124611000 Low Mappability +chr10 124680500 124686200 Low Mappability +chr10 124760500 124788800 High Signal Region +chr10 125819500 125825700 High Signal Region +chr10 125869000 125871400 High Signal Region +chr10 126262200 126291600 Low Mappability +chr10 127779500 127797900 High Signal Region +chr10 129189500 129217200 High Signal Region +chr10 129388700 129419600 Low Mappability +chr10 129443000 129454800 High Signal Region +chr10 129734500 129736400 High Signal Region +chr10 129925300 129940600 Low Mappability +chr10 130039500 130052900 High Signal Region +chr10 130396900 130408000 High Signal Region +chr10 130542000 130694900 High Signal Region +chr11 0 3201000 High Signal Region +chr11 5167600 5182600 High Signal Region +chr11 5361500 5365400 Low Mappability +chr11 5552700 5558200 Low Mappability +chr11 6141300 6148700 Low Mappability +chr11 7489400 7492300 High Signal Region +chr11 7752300 7774500 Low Mappability +chr11 8058600 8083100 Low Mappability +chr11 8354900 8370700 High Signal Region +chr11 8907200 8936100 Low Mappability +chr11 9707900 9715100 Low Mappability +chr11 9807600 9814200 Low Mappability +chr11 10252000 10266800 High Signal Region +chr11 10760200 10770800 Low Mappability +chr11 11287200 11295100 High Signal Region +chr11 12129400 12163100 High Signal Region +chr11 12507200 12512700 Low Mappability +chr11 12561900 12569100 Low Mappability +chr11 12750500 12802700 High Signal Region +chr11 12856200 12863700 High Signal Region +chr11 12953900 12960700 Low Mappability +chr11 14896500 14922100 High Signal Region +chr11 15227600 15235000 Low Mappability +chr11 16022400 16029000 High Signal Region +chr11 16326500 16331700 High Signal Region +chr11 16418200 16419600 High Signal Region +chr11 16567100 16573100 High Signal Region +chr11 17401400 17407800 High Signal Region +chr11 18330900 18342700 High Signal Region +chr11 18773800 18780100 High Signal Region +chr11 19566100 19570600 Low Mappability +chr11 19788600 19809400 Low Mappability +chr11 20310000 20312000 High Signal Region +chr11 20377900 20380400 High Signal Region +chr11 22322000 22340700 Low Mappability +chr11 22395200 22432900 Low Mappability +chr11 22534700 22537000 Low Mappability +chr11 23218500 23258100 Low Mappability +chr11 23522600 23552900 High Signal Region +chr11 24527400 24529500 Low Mappability +chr11 25196800 25217300 High Signal Region +chr11 25796400 25802200 Low Mappability +chr11 26898500 26900500 High Signal Region +chr11 27525200 27541400 High Signal Region +chr11 28097200 28104500 Low Mappability +chr11 29064100 29129900 Low Mappability +chr11 29259900 29291300 High Signal Region +chr11 29586000 29592400 Low Mappability +chr11 30511100 30535400 High Signal Region +chr11 31343800 31345700 Low Mappability +chr11 33062300 33068800 Low Mappability +chr11 34541000 34683100 High Signal Region +chr11 37482400 37484900 High Signal Region +chr11 40230800 40248400 High Signal Region +chr11 40625500 40640300 Low Mappability +chr11 40796600 40860600 High Signal Region +chr11 40887700 40915600 High Signal Region +chr11 41631700 41633600 High Signal Region +chr11 43237300 43239300 Low Mappability +chr11 43286400 43329800 High Signal Region +chr11 43454800 43462300 Low Mappability +chr11 43659700 43682100 Low Mappability +chr11 45584200 45655700 Low Mappability +chr11 46412300 46415000 Low Mappability +chr11 46492800 46514400 Low Mappability +chr11 47847500 47860600 High Signal Region +chr11 48451800 48536100 High Signal Region +chr11 48929800 49060400 Low Mappability +chr11 50445100 50469600 High Signal Region +chr11 51437600 51456700 High Signal Region +chr11 51664900 51690400 Low Mappability +chr11 54135500 54141600 High Signal Region +chr11 54576500 54583300 Low Mappability +chr11 55240500 55248100 Low Mappability +chr11 56588500 56594500 High Signal Region +chr11 57301700 57303600 High Signal Region +chr11 60558900 60699000 Low Mappability +chr11 61407400 61427800 Low Mappability +chr11 61593700 61596500 Low Mappability +chr11 62879300 62901500 High Signal Region +chr11 63467600 63475000 Low Mappability +chr11 64568100 64574200 High Signal Region +chr11 64681700 64683600 Low Mappability +chr11 64791900 64827100 Low Mappability +chr11 65451700 65458800 Low Mappability +chr11 66629900 66634100 High Signal Region +chr11 66947700 66958600 Low Mappability +chr11 67866400 67872800 Low Mappability +chr11 70155800 70162400 Low Mappability +chr11 71505700 71512100 Low Mappability +chr11 71875200 71881700 Low Mappability +chr11 73436900 73439100 Low Mappability +chr11 74128800 74136200 Low Mappability +chr11 74199900 74226800 Low Mappability +chr11 74301700 74319600 High Signal Region +chr11 74540000 74548400 Low Mappability +chr11 74884300 74899000 Low Mappability +chr11 76828100 76868600 Low Mappability +chr11 77255000 77257100 Low Mappability +chr11 79845100 79847300 Low Mappability +chr11 79872400 79877100 Low Mappability +chr11 79917300 79920800 Low Mappability +chr11 81545400 81552800 Low Mappability +chr11 82123300 82144400 High Signal Region +chr11 82333900 82338400 Low Mappability +chr11 83050300 83093600 High Signal Region +chr11 83126000 83172300 Low Mappability +chr11 85046500 85067800 High Signal Region +chr11 85285400 85292700 High Signal Region +chr11 88910900 88917600 Low Mappability +chr11 88965900 88971900 High Signal Region +chr11 89080800 89101300 High Signal Region +chr11 90504000 90510500 High Signal Region +chr11 90829400 90835000 Low Mappability +chr11 90901700 90908400 Low Mappability +chr11 90958500 91026800 Low Mappability +chr11 91047200 91049300 Low Mappability +chr11 92099000 92108200 High Signal Region +chr11 93409300 93428900 High Signal Region +chr11 94622900 94629900 Low Mappability +chr11 96065000 96093900 High Signal Region +chr11 98586900 98673900 Low Mappability +chr11 99712600 99717300 High Signal Region +chr11 100662800 100669700 Low Mappability +chr11 101731800 101741400 High Signal Region +chr11 102992300 103049900 Low Mappability +chr11 104239000 104242600 Low Mappability +chr11 106028100 106037400 High Signal Region +chr11 106254800 106297600 High Signal Region +chr11 106943500 106950100 Low Mappability +chr11 107188200 107200400 High Signal Region +chr11 107281300 107283200 High Signal Region +chr11 108377600 108404500 Low Mappability +chr11 108649800 108655400 Low Mappability +chr11 109010700 109024400 High Signal Region +chr11 109998500 110024600 Low Mappability +chr11 110421300 110423200 High Signal Region +chr11 111182400 111189800 Low Mappability +chr11 111215500 111234900 Low Mappability +chr11 111353300 111360000 Low Mappability +chr11 111855400 111857100 High Signal Region +chr11 112010600 112016400 High Signal Region +chr11 114456300 114462800 Low Mappability +chr11 115014300 115046900 Low Mappability +chr11 115611200 115665700 High Signal Region +chr11 115754800 115766900 Low Mappability +chr11 116389300 116395200 Low Mappability +chr11 116742700 116792800 Low Mappability +chr11 117499800 117505100 Low Mappability +chr11 119299800 119340300 Low Mappability +chr11 120305300 120357300 Low Mappability +chr11 120515100 120644700 High Signal Region +chr11 121069800 121075100 High Signal Region +chr11 121203000 121207500 Low Mappability +chr11 121396100 121422700 Low Mappability +chr11 121611900 121614000 Low Mappability +chr11 121981400 122082500 High Signal Region +chr12 0 3070900 High Signal Region +chr12 3102800 3111000 High Signal Region +chr12 4110500 4112400 High Signal Region +chr12 4218500 4235300 High Signal Region +chr12 4751600 4790100 High Signal Region +chr12 5050300 5065400 High Signal Region +chr12 6514000 6525100 High Signal Region +chr12 6606500 6612600 High Signal Region +chr12 7447300 7449900 High Signal Region +chr12 7801900 7808600 High Signal Region +chr12 7925300 7939600 High Signal Region +chr12 8572000 8640600 High Signal Region +chr12 10693000 10704200 High Signal Region +chr12 10961300 11004600 High Signal Region +chr12 11187600 11194100 High Signal Region +chr12 11642900 11658000 High Signal Region +chr12 12092500 12097600 High Signal Region +chr12 14844600 14848200 High Signal Region +chr12 15026600 15032400 High Signal Region +chr12 15252700 15259600 High Signal Region +chr12 15866100 15871800 High Signal Region +chr12 16746900 16748800 High Signal Region +chr12 17116400 17129400 High Signal Region +chr12 17243500 17248500 High Signal Region +chr12 18340700 18354800 High Signal Region +chr12 18856500 18909700 High Signal Region +chr12 19312600 19413500 High Signal Region +chr12 19442600 19590100 High Signal Region +chr12 19627700 19633600 High Signal Region +chr12 19777500 19781600 High Signal Region +chr12 19879300 19901200 High Signal Region +chr12 19931800 19948600 High Signal Region +chr12 20031900 20205100 High Signal Region +chr12 20225600 20298300 High Signal Region +chr12 21914300 21916000 Low Mappability +chr12 21972100 21987900 High Signal Region +chr12 22021600 22680500 Low Mappability +chr12 22896100 22902300 High Signal Region +chr12 23140700 23225200 High Signal Region +chr12 23283500 24030600 High Signal Region +chr12 24295300 24365100 Low Mappability +chr12 24692300 24727100 High Signal Region +chr12 25591800 25595300 Low Mappability +chr12 25840400 25842100 High Signal Region +chr12 27556800 27592000 High Signal Region +chr12 28491400 28494000 High Signal Region +chr12 28954800 28964000 High Signal Region +chr12 29379500 29400800 High Signal Region +chr12 30965100 31016300 High Signal Region +chr12 32020400 32032500 Low Mappability +chr12 32217700 32219200 High Signal Region +chr12 33388100 33410100 Low Mappability +chr12 33748900 33771800 High Signal Region +chr12 33869500 33880600 High Signal Region +chr12 34056800 34074100 High Signal Region +chr12 34128700 34139700 High Signal Region +chr12 34623000 34629000 Low Mappability +chr12 35783900 35814400 High Signal Region +chr12 36099400 36107200 High Signal Region +chr12 36679100 36700200 Low Mappability +chr12 36952200 36957900 High Signal Region +chr12 38746900 38749300 High Signal Region +chr12 41363500 41385500 High Signal Region +chr12 41502600 41516100 High Signal Region +chr12 41860000 41870200 High Signal Region +chr12 42124500 42126300 High Signal Region +chr12 42437900 42443400 High Signal Region +chr12 42666800 42690800 High Signal Region +chr12 43335600 43349300 High Signal Region +chr12 43659100 43675300 High Signal Region +chr12 43953900 43986900 High Signal Region +chr12 44064500 44070600 High Signal Region +chr12 44765600 44795900 Low Mappability +chr12 45768700 45773700 High Signal Region +chr12 45949200 45962200 High Signal Region +chr12 46707000 46709200 High Signal Region +chr12 47027300 47039300 High Signal Region +chr12 47280500 47286800 High Signal Region +chr12 47328600 47331300 High Signal Region +chr12 47646800 47648300 High Signal Region +chr12 47833000 47834900 High Signal Region +chr12 47995600 47997600 High Signal Region +chr12 48842900 48849500 High Signal Region +chr12 49124800 49155700 High Signal Region +chr12 49245200 49272100 High Signal Region +chr12 49606200 49612000 High Signal Region +chr12 50784600 50789900 High Signal Region +chr12 51486000 51492000 High Signal Region +chr12 52157900 52176400 High Signal Region +chr12 52200400 52223200 High Signal Region +chr12 52579600 52581200 High Signal Region +chr12 52730000 52735400 Low Mappability +chr12 52906200 52952300 High Signal Region +chr12 54358500 54369200 High Signal Region +chr12 54705400 54743600 High Signal Region +chr12 55079600 55267300 Low Mappability +chr12 56104100 56110600 Low Mappability +chr12 56423700 56425000 High Signal Region +chr12 56747800 56752200 High Signal Region +chr12 56911000 56914000 High Signal Region +chr12 58294800 58339800 High Signal Region +chr12 58659000 58692900 High Signal Region +chr12 58858800 58867600 High Signal Region +chr12 59034800 59039300 Low Mappability +chr12 59112800 59124700 High Signal Region +chr12 59270000 59276700 High Signal Region +chr12 59297800 59323200 High Signal Region +chr12 59601000 59605800 High Signal Region +chr12 60069500 60084400 High Signal Region +chr12 60501200 60506200 High Signal Region +chr12 61044200 61045300 High Signal Region +chr12 61289100 61293700 High Signal Region +chr12 61892600 61896100 High Signal Region +chr12 61964500 61971300 High Signal Region +chr12 62035300 62090200 High Signal Region +chr12 62959800 62999500 High Signal Region +chr12 63041800 63048200 High Signal Region +chr12 63289500 63322400 High Signal Region +chr12 63728400 63745100 High Signal Region +chr12 63838200 63840100 High Signal Region +chr12 65260100 65292400 High Signal Region +chr12 65784500 65808300 High Signal Region +chr12 66103800 66127200 High Signal Region +chr12 67058200 67060800 High Signal Region +chr12 67433500 67459300 High Signal Region +chr12 67519200 67571500 High Signal Region +chr12 67828900 67836600 High Signal Region +chr12 68696500 68711800 High Signal Region +chr12 68745100 68750600 Low Mappability +chr12 69059900 69061300 High Signal Region +chr12 69653100 69657800 High Signal Region +chr12 70641800 70668400 Low Mappability +chr12 71077100 71093600 Low Mappability +chr12 71589600 71596000 High Signal Region +chr12 72203000 72209300 High Signal Region +chr12 72634700 72641300 High Signal Region +chr12 74620800 74642100 High Signal Region +chr12 74775800 74778200 High Signal Region +chr12 74803000 74805400 High Signal Region +chr12 74857200 74862700 High Signal Region +chr12 75241800 75248400 High Signal Region +chr12 77160700 77166000 High Signal Region +chr12 77383500 77411300 High Signal Region +chr12 77547200 77553900 High Signal Region +chr12 78260000 78373200 High Signal Region +chr12 78462400 78468500 High Signal Region +chr12 80417200 80449700 High Signal Region +chr12 80894500 80916600 High Signal Region +chr12 81550400 81555100 High Signal Region +chr12 81985400 82064000 Low Mappability +chr12 83093000 83094900 High Signal Region +chr12 85401000 85408600 High Signal Region +chr12 87585600 87771500 Low Mappability +chr12 87802800 88006400 High Signal Region +chr12 88119800 88169700 Low Mappability +chr12 88229600 88312400 High Signal Region +chr12 88493200 88516700 Low Mappability +chr12 91221400 91256000 High Signal Region +chr12 91439200 91475500 High Signal Region +chr12 92393800 92395800 Low Mappability +chr12 92839700 92892700 High Signal Region +chr12 93233800 93265600 High Signal Region +chr12 93564200 93590500 High Signal Region +chr12 93915400 93951600 High Signal Region +chr12 94268500 94273900 High Signal Region +chr12 94550200 94556100 High Signal Region +chr12 94694300 94713700 High Signal Region +chr12 95976100 96021400 High Signal Region +chr12 97038100 97062700 High Signal Region +chr12 97616600 97622400 High Signal Region +chr12 98173700 98176600 High Signal Region +chr12 99644200 99649400 High Signal Region +chr12 100490600 100492300 High Signal Region +chr12 100766900 100825300 High Signal Region +chr12 101427900 101453500 High Signal Region +chr12 101839700 101849500 High Signal Region +chr12 102892000 102893900 High Signal Region +chr12 103458100 103472900 High Signal Region +chr12 103776900 103813700 High Signal Region +chr12 105300300 105307000 High Signal Region +chr12 105435200 105437100 High Signal Region +chr12 105523800 105525700 High Signal Region +chr12 105628200 105631400 High Signal Region +chr12 108078800 108084400 High Signal Region +chr12 109901900 109909200 Low Mappability +chr12 110011800 110013700 High Signal Region +chr12 111388200 111417100 High Signal Region +chr12 112542200 112548700 High Signal Region +chr12 112775700 112830900 Low Mappability +chr12 113423500 113461500 High Signal Region +chr12 114584600 114597100 High Signal Region +chr12 114941500 114943900 High Signal Region +chr12 115725800 115748700 High Signal Region +chr12 116796500 116853000 High Signal Region +chr12 118341100 118358400 High Signal Region +chr12 118794900 118797400 High Signal Region +chr12 119013600 119018100 High Signal Region +chr12 119554500 119598100 High Signal Region +chr12 119659100 119670900 High Signal Region +chr12 120023800 120129000 High Signal Region +chr13 0 3038200 High Signal Region +chr13 3350900 3378900 High Signal Region +chr13 3404500 3438200 High Signal Region +chr13 3901100 3903100 Low Mappability +chr13 4762900 4770300 High Signal Region +chr13 5171400 5178400 High Signal Region +chr13 7601300 7604100 High Signal Region +chr13 7806100 7810900 High Signal Region +chr13 7893500 7899700 High Signal Region +chr13 9828900 9855900 High Signal Region +chr13 10174800 10181100 Low Mappability +chr13 12684400 13073000 High Signal Region +chr13 13752100 13774000 High Signal Region +chr13 13859900 13907900 High Signal Region +chr13 13981000 13983000 High Signal Region +chr13 14690600 14777500 Low Mappability +chr13 18932700 18963600 Low Mappability +chr13 21753300 21847200 Low Mappability +chr13 23620800 23647900 Low Mappability +chr13 25006900 25051500 High Signal Region +chr13 26440600 26448200 High Signal Region +chr13 27164600 27169100 High Signal Region +chr13 27875800 27888500 High Signal Region +chr13 29880700 29886800 Low Mappability +chr13 32889400 32895200 High Signal Region +chr13 33280200 33319400 High Signal Region +chr13 33350500 33491800 High Signal Region +chr13 35687400 35695700 High Signal Region +chr13 36794200 36797400 High Signal Region +chr13 37036700 37043900 High Signal Region +chr13 38633900 38659300 Low Mappability +chr13 42435800 42437700 High Signal Region +chr13 44868600 44870900 High Signal Region +chr13 46316600 46324000 High Signal Region +chr13 50633400 50741800 High Signal Region +chr13 53269000 53270900 High Signal Region +chr13 60675600 60682600 High Signal Region +chr13 62291600 62346800 Low Mappability +chr13 62409800 62426300 High Signal Region +chr13 63142500 63184600 High Signal Region +chr13 64878100 64885300 High Signal Region +chr13 65352900 66254300 Low Mappability +chr13 71381400 71387500 High Signal Region +chr13 74521500 74565200 High Signal Region +chr13 74684000 74712200 High Signal Region +chr13 76472300 76501300 High Signal Region +chr13 77304000 77305900 High Signal Region +chr13 77430600 77440000 High Signal Region +chr13 79563400 79570800 High Signal Region +chr13 80276300 80279400 High Signal Region +chr13 80489100 80491400 High Signal Region +chr13 83419000 83444300 High Signal Region +chr13 85125800 85145900 High Signal Region +chr13 86149500 86190600 High Signal Region +chr13 86502700 86511700 High Signal Region +chr13 88324900 88345400 High Signal Region +chr13 92599100 92625400 Low Mappability +chr13 93279200 93294800 High Signal Region +chr13 93650100 93651500 High Signal Region +chr13 93940300 93955300 High Signal Region +chr13 94016300 94020800 High Signal Region +chr13 97189600 97206100 High Signal Region +chr13 98418200 98420500 Low Mappability +chr13 99774000 99792100 High Signal Region +chr13 102381900 102387900 High Signal Region +chr13 105123500 105128600 Low Mappability +chr13 107839000 107860300 Low Mappability +chr13 110602100 110615800 High Signal Region +chr13 110729600 110745400 High Signal Region +chr13 111187700 111189500 High Signal Region +chr13 111499700 111515900 Low Mappability +chr13 112577200 112595200 High Signal Region +chr13 113171200 113173100 High Signal Region +chr13 113272600 113310700 High Signal Region +chr13 115498200 115504200 High Signal Region +chr13 115741300 115743200 Low Mappability +chr13 116191900 116193900 High Signal Region +chr13 119188100 119230700 High Signal Region +chr13 119486800 119618500 High Signal Region +chr13 119660800 119674100 High Signal Region +chr13 119899200 120147600 Low Mappability +chr13 120320500 120421600 High Signal Region +chr14 0 4323000 High Signal Region +chr14 4372100 4741400 High Signal Region +chr14 4762800 5839200 High Signal Region +chr14 5959700 6479300 High Signal Region +chr14 6500100 6791800 High Signal Region +chr14 6993800 7734200 High Signal Region +chr14 7869900 7872200 High Signal Region +chr14 8005200 8018900 High Signal Region +chr14 8285700 8287800 High Signal Region +chr14 8652200 8658800 Low Mappability +chr14 10086500 10118400 High Signal Region +chr14 10178800 10198700 Low Mappability +chr14 11046200 11050200 High Signal Region +chr14 12536700 12538700 High Signal Region +chr14 14333600 14340200 High Signal Region +chr14 15460700 15467200 High Signal Region +chr14 16907800 16914000 High Signal Region +chr14 16937900 16941100 High Signal Region +chr14 18487900 18494100 High Signal Region +chr14 19251900 19255700 High Signal Region +chr14 19277200 19279100 High Signal Region +chr14 19414800 19633500 High Signal Region +chr14 21360400 21366100 High Signal Region +chr14 21878600 21884500 High Signal Region +chr14 22542900 22570000 High Signal Region +chr14 22902100 22934800 High Signal Region +chr14 25875200 26292200 High Signal Region +chr14 26946900 26948800 High Signal Region +chr14 29001300 29003200 Low Mappability +chr14 29343900 29345700 Low Mappability +chr14 30748800 30754700 High Signal Region +chr14 31919300 31923900 High Signal Region +chr14 32115300 32120500 Low Mappability +chr14 33667700 33670000 Low Mappability +chr14 33981000 33987500 Low Mappability +chr14 35275300 35281500 High Signal Region +chr14 35709400 35722200 High Signal Region +chr14 36429100 36440100 High Signal Region +chr14 37229100 37260800 Low Mappability +chr14 37619400 37635200 Low Mappability +chr14 38086800 38116800 High Signal Region +chr14 38280800 38283100 High Signal Region +chr14 38455100 38462200 Low Mappability +chr14 39580800 39607200 High Signal Region +chr14 39731900 39737200 High Signal Region +chr14 39905500 39911100 High Signal Region +chr14 41053200 41061900 Low Mappability +chr14 41326900 43109000 High Signal Region +chr14 43132400 43668900 High Signal Region +chr14 43803900 43850200 High Signal Region +chr14 44149300 44152100 High Signal Region +chr14 44273800 44343500 High Signal Region +chr14 44514200 44516000 Low Mappability +chr14 45726200 45753500 High Signal Region +chr14 45811900 45813800 High Signal Region +chr14 46269900 46274300 High Signal Region +chr14 47609500 47630400 High Signal Region +chr14 50538900 50606000 High Signal Region +chr14 50626200 50638500 High Signal Region +chr14 51472000 51515400 High Signal Region +chr14 51730700 51768100 High Signal Region +chr14 51814200 51837200 High Signal Region +chr14 52821200 53035800 Low Mappability +chr14 53146700 53340000 High Signal Region +chr14 53475200 53479600 High Signal Region +chr14 53515600 53530500 Low Mappability +chr14 56447800 56455700 High Signal Region +chr14 56693100 56695000 High Signal Region +chr14 58052600 58059800 Low Mappability +chr14 58462700 58464600 Low Mappability +chr14 58657800 58659700 High Signal Region +chr14 58831400 58833300 High Signal Region +chr14 59250300 59270000 High Signal Region +chr14 59488900 59490800 High Signal Region +chr14 59980800 59995700 High Signal Region +chr14 60328300 60357300 High Signal Region +chr14 60960000 60961900 Low Mappability +chr14 61580500 61586700 High Signal Region +chr14 61855000 61856300 High Signal Region +chr14 62107300 62126200 High Signal Region +chr14 64290100 64292500 High Signal Region +chr14 64463300 64478500 Low Mappability +chr14 65128900 65135300 Low Mappability +chr14 66427000 66428400 High Signal Region +chr14 68232600 68278200 High Signal Region +chr14 69161000 69163400 High Signal Region +chr14 70974500 70975600 High Signal Region +chr14 71121300 71126700 High Signal Region +chr14 71449700 71453700 High Signal Region +chr14 71783600 71804000 High Signal Region +chr14 72900100 72921400 High Signal Region +chr14 73644600 73679900 High Signal Region +chr14 73847900 73861200 High Signal Region +chr14 74039300 74066900 High Signal Region +chr14 74124400 74138500 High Signal Region +chr14 74435600 74447800 High Signal Region +chr14 75425300 75440500 High Signal Region +chr14 78162300 78168200 High Signal Region +chr14 78401700 78403200 High Signal Region +chr14 79145300 79196400 High Signal Region +chr14 80148100 80150800 High Signal Region +chr14 80422800 80439400 High Signal Region +chr14 80622600 80627700 High Signal Region +chr14 81333200 81337500 High Signal Region +chr14 81495300 81519300 High Signal Region +chr14 82077600 82084900 High Signal Region +chr14 82846900 82867200 High Signal Region +chr14 82958700 82964100 High Signal Region +chr14 83292900 83306500 High Signal Region +chr14 83507000 83512600 High Signal Region +chr14 84354700 84409800 High Signal Region +chr14 84855100 84881600 Low Mappability +chr14 85177800 85203300 Low Mappability +chr14 85521200 85535200 Low Mappability +chr14 86198000 86200000 High Signal Region +chr14 86590500 86614400 High Signal Region +chr14 87354600 87373000 High Signal Region +chr14 87671400 87677500 High Signal Region +chr14 87790500 87852200 High Signal Region +chr14 88450200 88453600 High Signal Region +chr14 88478400 88480300 High Signal Region +chr14 90018300 90019500 High Signal Region +chr14 90294700 90301800 High Signal Region +chr14 90910200 90912200 High Signal Region +chr14 91415900 91418400 High Signal Region +chr14 91510800 91514900 High Signal Region +chr14 91672700 91694800 High Signal Region +chr14 91951700 91976400 High Signal Region +chr14 92032500 92040900 High Signal Region +chr14 92383600 92389900 High Signal Region +chr14 92411600 92432900 High Signal Region +chr14 92792600 92798500 High Signal Region +chr14 92921100 92953200 High Signal Region +chr14 93017600 93020400 High Signal Region +chr14 93355600 93360200 High Signal Region +chr14 94319700 94327000 High Signal Region +chr14 95561600 95567600 High Signal Region +chr14 96048000 96054300 High Signal Region +chr14 96093600 96116100 High Signal Region +chr14 97323800 97326500 High Signal Region +chr14 98226800 98237000 High Signal Region +chr14 98731900 98757200 High Signal Region +chr14 99207100 99208200 High Signal Region +chr14 99649700 99655500 High Signal Region +chr14 101076400 101098900 Low Mappability +chr14 101404800 101414800 High Signal Region +chr14 102548900 102565300 High Signal Region +chr14 102755800 102762600 High Signal Region +chr14 103300300 103302400 High Signal Region +chr14 103858600 103872900 High Signal Region +chr14 103999500 104025500 High Signal Region +chr14 104104800 104128100 Low Mappability +chr14 104704500 104716800 High Signal Region +chr14 105758200 105764900 Low Mappability +chr14 105911400 105978300 High Signal Region +chr14 106002700 106005700 Low Mappability +chr14 106301000 106352700 High Signal Region +chr14 106444800 106483100 Low Mappability +chr14 106722600 106728700 High Signal Region +chr14 106895300 106897000 Low Mappability +chr14 108115100 108174900 Low Mappability +chr14 108283900 108303500 High Signal Region +chr14 109675300 109681200 High Signal Region +chr14 109911500 109917800 High Signal Region +chr14 110057000 110108200 Low Mappability +chr14 110356200 110373800 High Signal Region +chr14 110492000 110495700 Low Mappability +chr14 110906100 110908200 High Signal Region +chr14 110992800 110994500 High Signal Region +chr14 111903200 111909800 High Signal Region +chr14 112074600 112092300 High Signal Region +chr14 112210500 112215800 High Signal Region +chr14 112285400 112291900 High Signal Region +chr14 112332800 112340000 Low Mappability +chr14 112517900 112519900 High Signal Region +chr14 112627800 112663100 Low Mappability +chr14 114505900 114512900 High Signal Region +chr14 114822000 114823900 Low Mappability +chr14 115109700 115117400 High Signal Region +chr14 115272500 115280200 High Signal Region +chr14 115379200 115385600 High Signal Region +chr14 115911100 115912900 High Signal Region +chr14 115958100 115965000 High Signal Region +chr14 116402700 116407700 High Signal Region +chr14 116817000 116822900 High Signal Region +chr14 117285800 117292800 High Signal Region +chr14 118144700 118168500 Low Mappability +chr14 119286000 119287900 High Signal Region +chr14 120180000 120202600 High Signal Region +chr14 120742600 120749700 High Signal Region +chr14 120777500 120802300 High Signal Region +chr14 121007000 121010900 Low Mappability +chr14 122502500 122534800 High Signal Region +chr14 123349400 123351300 Low Mappability +chr14 123412000 123452600 High Signal Region +chr14 123674600 123695600 High Signal Region +chr14 124334000 124340200 High Signal Region +chr14 124415600 124436400 High Signal Region +chr14 124491600 124497700 High Signal Region +chr14 124739500 124902200 High Signal Region +chr15 0 3125600 High Signal Region +chr15 3150900 3170400 High Signal Region +chr15 3313900 3336200 High Signal Region +chr15 3360500 3363700 High Signal Region +chr15 3538600 3551000 High Signal Region +chr15 3712200 3732700 High Signal Region +chr15 3793500 3823000 High Signal Region +chr15 4155900 4160900 High Signal Region +chr15 4278500 4284100 High Signal Region +chr15 4852000 4894600 Low Mappability +chr15 4980200 4987600 Low Mappability +chr15 5369000 5385500 High Signal Region +chr15 5681700 5690400 High Signal Region +chr15 5910000 5911700 High Signal Region +chr15 5993500 5995400 High Signal Region +chr15 6074100 6087100 Low Mappability +chr15 6192800 6200000 Low Mappability +chr15 6316000 6317900 High Signal Region +chr15 6510500 6539100 High Signal Region +chr15 6674800 6701400 High Signal Region +chr15 6801200 6808300 High Signal Region +chr15 7539900 7548600 Low Mappability +chr15 7800800 7803000 Low Mappability +chr15 7849400 7855600 High Signal Region +chr15 7904400 7929500 Low Mappability +chr15 8517500 8520400 High Signal Region +chr15 8548000 8576100 Low Mappability +chr15 8800200 8808700 High Signal Region +chr15 8985200 9054800 High Signal Region +chr15 9219000 9224900 Low Mappability +chr15 9293200 9333300 High Signal Region +chr15 9379300 9409100 High Signal Region +chr15 9437100 9443600 High Signal Region +chr15 9536500 9554100 High Signal Region +chr15 9992700 10045700 High Signal Region +chr15 10579600 10591500 Low Mappability +chr15 10753400 10810200 High Signal Region +chr15 10835200 10854700 Low Mappability +chr15 11921000 11933300 High Signal Region +chr15 12055800 12063200 Low Mappability +chr15 12526800 12531900 Low Mappability +chr15 12872000 12873900 High Signal Region +chr15 12932300 12934200 Low Mappability +chr15 13919500 13948300 High Signal Region +chr15 14414600 14439100 Low Mappability +chr15 14722200 14732900 High Signal Region +chr15 14873900 14902400 High Signal Region +chr15 15043600 15059700 High Signal Region +chr15 15525500 15551900 High Signal Region +chr15 16168200 16186400 High Signal Region +chr15 16303700 16309500 High Signal Region +chr15 16716400 16717500 High Signal Region +chr15 16901300 16907100 High Signal Region +chr15 16939800 16955100 Low Mappability +chr15 17139000 17169100 High Signal Region +chr15 17562100 17581400 High Signal Region +chr15 18314600 18325000 High Signal Region +chr15 19038400 19063800 Low Mappability +chr15 19402600 19405500 High Signal Region +chr15 19448100 19453900 High Signal Region +chr15 19557200 19578000 High Signal Region +chr15 19626800 19631800 High Signal Region +chr15 19678400 19685800 High Signal Region +chr15 20063000 20067500 High Signal Region +chr15 20155100 20170700 Low Mappability +chr15 20474900 20510100 High Signal Region +chr15 20531400 20537100 High Signal Region +chr15 20821500 20826700 High Signal Region +chr15 20972700 20978300 Low Mappability +chr15 21114000 21115900 High Signal Region +chr15 21262100 21268500 Low Mappability +chr15 21423200 21487200 High Signal Region +chr15 21655500 21657500 High Signal Region +chr15 21815500 21820800 High Signal Region +chr15 21853700 21892400 High Signal Region +chr15 22268700 22293500 High Signal Region +chr15 22751400 22756700 Low Mappability +chr15 22799300 22809700 Low Mappability +chr15 23240200 23255600 Low Mappability +chr15 23465300 23467800 High Signal Region +chr15 23886000 23887900 Low Mappability +chr15 23926900 23939700 High Signal Region +chr15 24309300 24325700 Low Mappability +chr15 24761100 24766700 High Signal Region +chr15 24801600 24837300 High Signal Region +chr15 24880900 24898600 Low Mappability +chr15 25051400 25065200 Low Mappability +chr15 26112700 26118900 High Signal Region +chr15 26905000 26919300 Low Mappability +chr15 27286100 27326800 High Signal Region +chr15 27384100 27390300 Low Mappability +chr15 27638200 27640500 High Signal Region +chr15 28564400 28578800 High Signal Region +chr15 29285200 29291500 Low Mappability +chr15 29347600 29395600 High Signal Region +chr15 29463900 29470200 High Signal Region +chr15 29969800 30001400 High Signal Region +chr15 30117700 30126200 High Signal Region +chr15 30441400 30448200 Low Mappability +chr15 30747900 30755000 High Signal Region +chr15 30996700 31016300 High Signal Region +chr15 31066700 31083700 High Signal Region +chr15 32783900 32806700 High Signal Region +chr15 32832800 32880300 High Signal Region +chr15 33138700 33140800 Low Mappability +chr15 33308700 33310800 Low Mappability +chr15 33444200 33454100 High Signal Region +chr15 33710200 33745700 High Signal Region +chr15 33781400 33849400 High Signal Region +chr15 33869800 33884700 High Signal Region +chr15 34494500 34502100 Low Mappability +chr15 34763100 34769400 High Signal Region +chr15 34987600 34992800 High Signal Region +chr15 35013200 35015400 High Signal Region +chr15 35366800 35406000 High Signal Region +chr15 36715200 36737400 High Signal Region +chr15 36966700 36997400 Low Mappability +chr15 37072900 37150800 Low Mappability +chr15 38462300 38484300 Low Mappability +chr15 39172900 39178300 Low Mappability +chr15 39335600 39348800 Low Mappability +chr15 39496100 39499100 High Signal Region +chr15 39695600 39718600 Low Mappability +chr15 40049600 40056000 High Signal Region +chr15 40086800 40101400 High Signal Region +chr15 41531400 41533200 High Signal Region +chr15 41890400 41896900 Low Mappability +chr15 42354900 42361100 High Signal Region +chr15 42925300 42942800 High Signal Region +chr15 43287300 43346300 High Signal Region +chr15 44469100 44476400 High Signal Region +chr15 44649000 44659600 Low Mappability +chr15 44723200 44728200 Low Mappability +chr15 44769700 44796100 High Signal Region +chr15 45005100 45009300 High Signal Region +chr15 45194600 45197100 High Signal Region +chr15 45577500 45590900 High Signal Region +chr15 45635600 45650500 High Signal Region +chr15 45774400 45779700 High Signal Region +chr15 45890700 45932500 High Signal Region +chr15 46255700 46257800 Low Mappability +chr15 46355600 46368400 High Signal Region +chr15 46502200 46506800 Low Mappability +chr15 46562500 46566200 Low Mappability +chr15 47232800 47256000 High Signal Region +chr15 47356500 47363700 Low Mappability +chr15 47539000 47555300 High Signal Region +chr15 48666900 48671000 High Signal Region +chr15 49283300 49299700 High Signal Region +chr15 49322600 49327300 Low Mappability +chr15 50426100 50442800 High Signal Region +chr15 50557700 50642600 High Signal Region +chr15 51113200 51117800 High Signal Region +chr15 51531900 51533900 Low Mappability +chr15 52125800 52131200 High Signal Region +chr15 52329800 52353100 High Signal Region +chr15 53039200 53044200 Low Mappability +chr15 53831000 53834900 High Signal Region +chr15 53870700 53872700 High Signal Region +chr15 53918300 53929500 High Signal Region +chr15 54180700 54211500 Low Mappability +chr15 56032900 56038200 High Signal Region +chr15 56175800 56183100 Low Mappability +chr15 56363800 56367900 High Signal Region +chr15 56400500 56402200 High Signal Region +chr15 56941600 56993500 High Signal Region +chr15 57279500 57285000 High Signal Region +chr15 57412200 57433600 High Signal Region +chr15 57889500 57913700 Low Mappability +chr15 58437200 58441100 High Signal Region +chr15 59421400 59435400 Low Mappability +chr15 59850100 59875200 Low Mappability +chr15 60153100 60203900 High Signal Region +chr15 60592000 60594300 Low Mappability +chr15 60931800 60986500 High Signal Region +chr15 61148600 61150700 High Signal Region +chr15 61903100 61915500 High Signal Region +chr15 62367600 62370100 High Signal Region +chr15 62553200 62555200 High Signal Region +chr15 62686500 62693700 High Signal Region +chr15 63329400 63346600 Low Mappability +chr15 63626000 63627900 High Signal Region +chr15 63791700 63796000 High Signal Region +chr15 63837600 63922800 High Signal Region +chr15 64591700 64598200 Low Mappability +chr15 64673500 64681900 High Signal Region +chr15 65115600 65123500 Low Mappability +chr15 65598500 65604500 High Signal Region +chr15 65666600 65673800 High Signal Region +chr15 65714400 65753500 High Signal Region +chr15 66045100 66065700 High Signal Region +chr15 66208300 66210200 High Signal Region +chr15 68136300 68137800 Low Mappability +chr15 68980000 68986500 High Signal Region +chr15 69122300 69164500 High Signal Region +chr15 69264900 69268800 High Signal Region +chr15 69390300 69409400 High Signal Region +chr15 69642000 69646000 High Signal Region +chr15 70083000 70088800 High Signal Region +chr15 70609300 70611100 High Signal Region +chr15 70896600 70914000 High Signal Region +chr15 71104600 71112200 High Signal Region +chr15 71206600 71237500 Low Mappability +chr15 73060200 73087900 Low Mappability +chr15 73373200 73378200 Low Mappability +chr15 73873000 73880400 Low Mappability +chr15 74360700 74368000 Low Mappability +chr15 74814300 74826700 Low Mappability +chr15 74992000 75104600 High Signal Region +chr15 75205600 75212800 Low Mappability +chr15 75298000 75299500 High Signal Region +chr15 75437000 75440500 High Signal Region +chr15 75523600 75529700 High Signal Region +chr15 76102000 76106500 High Signal Region +chr15 76559900 76577900 Low Mappability +chr15 76964600 76971400 Low Mappability +chr15 77336200 77439100 High Signal Region +chr15 77718300 77735600 Low Mappability +chr15 77895000 77934800 Low Mappability +chr15 79685000 79775700 Low Mappability +chr15 79869700 79892600 Low Mappability +chr15 79974400 79978400 Low Mappability +chr15 80232400 80267100 High Signal Region +chr15 81145400 81152000 Low Mappability +chr15 81492300 81523600 High Signal Region +chr15 82338000 82368000 Low Mappability +chr15 82590700 82608900 Low Mappability +chr15 82675500 82677200 High Signal Region +chr15 83172100 83202200 Low Mappability +chr15 84746600 84753000 Low Mappability +chr15 85176800 85196600 Low Mappability +chr15 85541200 85543100 High Signal Region +chr15 86193800 86196100 High Signal Region +chr15 86312100 86326400 Low Mappability +chr15 87293900 87301200 Low Mappability +chr15 87967000 87969000 High Signal Region +chr15 88779400 88783900 Low Mappability +chr15 88974800 88976800 High Signal Region +chr15 89597900 89621300 High Signal Region +chr15 89808500 89809700 High Signal Region +chr15 89943000 89982000 Low Mappability +chr15 90636400 90643600 Low Mappability +chr15 91115900 91134800 Low Mappability +chr15 91419400 91422200 High Signal Region +chr15 91720600 91723200 Low Mappability +chr15 91905900 91911200 High Signal Region +chr15 92470100 92475100 Low Mappability +chr15 92613700 92618300 Low Mappability +chr15 92722600 92730100 Low Mappability +chr15 92796100 92820000 Low Mappability +chr15 93044100 93062000 High Signal Region +chr15 93467800 93469500 Low Mappability +chr15 93867100 93873600 High Signal Region +chr15 94088400 94124100 High Signal Region +chr15 94150500 94156800 High Signal Region +chr15 94373000 94379600 High Signal Region +chr15 95087600 95092100 High Signal Region +chr15 95306000 95312300 High Signal Region +chr15 95729500 95756400 High Signal Region +chr15 96551700 96559500 Low Mappability +chr15 96977900 96983600 Low Mappability +chr15 97082100 97084300 High Signal Region +chr15 97472900 97487400 Low Mappability +chr15 99168800 99171900 High Signal Region +chr15 99552100 99553900 Low Mappability +chr15 100331500 100339800 Low Mappability +chr15 100360000 100379700 Low Mappability +chr15 100541700 100617400 Low Mappability +chr15 101655700 101662100 High Signal Region +chr15 102596800 102603200 High Signal Region +chr15 103271900 103277100 High Signal Region +chr15 103406700 103418500 High Signal Region +chr15 103606700 103611400 High Signal Region +chr15 103814500 104043600 High Signal Region +chr16 0 3427800 High Signal Region +chr16 3450300 3519700 Low Mappability +chr16 4300400 4366800 Low Mappability +chr16 4585000 4591300 High Signal Region +chr16 5708200 5710200 High Signal Region +chr16 7460800 7463600 High Signal Region +chr16 7937100 7958400 Low Mappability +chr16 8256700 8286200 High Signal Region +chr16 9577100 9579600 Low Mappability +chr16 10631200 10633200 Low Mappability +chr16 10974100 11013900 High Signal Region +chr16 11134600 11145200 High Signal Region +chr16 11248000 11249900 Low Mappability +chr16 11679900 11687500 Low Mappability +chr16 12327300 12345900 Low Mappability +chr16 12417900 12423400 High Signal Region +chr16 12829200 12831000 High Signal Region +chr16 12976200 12981700 Low Mappability +chr16 13087700 13107000 Low Mappability +chr16 13903200 13925900 Low Mappability +chr16 14316200 14341200 Low Mappability +chr16 15502700 15510100 Low Mappability +chr16 15741400 15757700 Low Mappability +chr16 17199900 17236000 High Signal Region +chr16 17751400 17761300 High Signal Region +chr16 17910400 17955500 High Signal Region +chr16 18532200 18534200 High Signal Region +chr16 18957500 18979200 High Signal Region +chr16 19334200 19375100 High Signal Region +chr16 19581200 19602400 Low Mappability +chr16 19711900 19748700 High Signal Region +chr16 19928600 19946300 Low Mappability +chr16 22923300 22929100 High Signal Region +chr16 26419300 26421200 High Signal Region +chr16 26808500 26814800 High Signal Region +chr16 27071900 27087600 High Signal Region +chr16 27212200 27218300 High Signal Region +chr16 28170600 28197500 High Signal Region +chr16 30828600 30830500 High Signal Region +chr16 31223800 31234300 Low Mappability +chr16 31339100 31358900 High Signal Region +chr16 31818700 31825200 Low Mappability +chr16 32147700 32153500 Low Mappability +chr16 32489700 32520100 Low Mappability +chr16 32579100 32598800 Low Mappability +chr16 33847200 33852600 Low Mappability +chr16 34581100 34591200 Low Mappability +chr16 34742000 34744000 High Signal Region +chr16 35980600 35983300 High Signal Region +chr16 36764900 36770500 Low Mappability +chr16 38714200 38721600 Low Mappability +chr16 39563700 39568200 High Signal Region +chr16 41270700 41273100 High Signal Region +chr16 42657300 42661200 High Signal Region +chr16 42773100 42779900 High Signal Region +chr16 42931600 42950000 High Signal Region +chr16 43764000 43771600 Low Mappability +chr16 44040400 44063900 Low Mappability +chr16 44709800 44726400 Low Mappability +chr16 44920200 44950700 Low Mappability +chr16 45292600 45293900 High Signal Region +chr16 45352100 45354000 High Signal Region +chr16 46364600 46369100 High Signal Region +chr16 47099100 47147300 High Signal Region +chr16 47552300 47564100 Low Mappability +chr16 48579900 48581300 Low Mappability +chr16 49024900 49031400 Low Mappability +chr16 49148400 49150300 Low Mappability +chr16 49447700 49489300 High Signal Region +chr16 50084900 50101400 Low Mappability +chr16 50909100 50926800 Low Mappability +chr16 51087100 51094300 Low Mappability +chr16 51945800 51980200 High Signal Region +chr16 53412000 53428900 High Signal Region +chr16 53571500 53595400 Low Mappability +chr16 54298300 54307600 Low Mappability +chr16 54861600 54869000 High Signal Region +chr16 54959000 54965200 High Signal Region +chr16 55647800 55681600 Low Mappability +chr16 56038100 56065100 Low Mappability +chr16 56988400 57008400 High Signal Region +chr16 57085500 57095800 High Signal Region +chr16 57390200 57392600 High Signal Region +chr16 57792800 57811700 Low Mappability +chr16 58310800 58343000 High Signal Region +chr16 58632300 58670400 Low Mappability +chr16 59121800 59129100 Low Mappability +chr16 59310100 59378100 High Signal Region +chr16 60921200 60970900 High Signal Region +chr16 61312500 61325200 Low Mappability +chr16 62564300 62599200 High Signal Region +chr16 62875900 62880400 Low Mappability +chr16 63114300 63151200 High Signal Region +chr16 63301300 63313600 High Signal Region +chr16 64384600 64425600 High Signal Region +chr16 65176900 65181400 Low Mappability +chr16 66229300 66247600 Low Mappability +chr16 67328200 67334700 High Signal Region +chr16 68272300 68274300 High Signal Region +chr16 70542300 70558300 Low Mappability +chr16 70633900 70639700 Low Mappability +chr16 70892400 70898400 High Signal Region +chr16 70976900 70982900 High Signal Region +chr16 71687000 71691500 Low Mappability +chr16 72019300 72023900 Low Mappability +chr16 72056200 72062100 High Signal Region +chr16 72724800 72730900 Low Mappability +chr16 73656700 73688600 High Signal Region +chr16 74771800 74781500 Low Mappability +chr16 76057000 76065000 Low Mappability +chr16 76487100 76519600 High Signal Region +chr16 76988700 76991600 High Signal Region +chr16 77116900 77121900 Low Mappability +chr16 78977100 79013600 High Signal Region +chr16 79368600 79376000 Low Mappability +chr16 79782000 79786700 High Signal Region +chr16 79943000 79948600 Low Mappability +chr16 80269400 80309700 Low Mappability +chr16 81071700 81079200 Low Mappability +chr16 81779900 81782000 High Signal Region +chr16 81859300 81865600 High Signal Region +chr16 82079700 82099600 High Signal Region +chr16 82237800 82243200 Low Mappability +chr16 82828200 82845600 High Signal Region +chr16 83077300 83081800 High Signal Region +chr16 83360600 83368000 Low Mappability +chr16 84260500 84283300 High Signal Region +chr16 84380600 84407600 High Signal Region +chr16 84440100 84446000 High Signal Region +chr16 85671600 85673000 High Signal Region +chr16 85713500 85720100 High Signal Region +chr16 86333000 86354300 High Signal Region +chr16 86539500 86570300 High Signal Region +chr16 86819800 86822100 High Signal Region +chr16 87055400 87060300 High Signal Region +chr16 87287400 87302500 Low Mappability +chr16 87372300 87391700 Low Mappability +chr16 88022900 88029900 High Signal Region +chr16 88790600 88797900 Low Mappability +chr16 88957900 88967800 High Signal Region +chr16 89145200 89196100 Low Mappability +chr16 89431800 89448400 Low Mappability +chr16 89636000 89642900 High Signal Region +chr16 89877500 89879700 High Signal Region +chr16 90056200 90072300 Low Mappability +chr16 90341200 90350100 Low Mappability +chr16 91533700 91551800 High Signal Region +chr16 92254500 92259400 Low Mappability +chr16 93581500 93622800 High Signal Region +chr16 93685800 93711200 High Signal Region +chr16 93785700 93790200 High Signal Region +chr16 93991400 93997900 High Signal Region +chr16 94258100 94282000 Low Mappability +chr16 95782000 95788900 High Signal Region +chr16 95991000 96010400 Low Mappability +chr16 97996400 98207700 High Signal Region +chr17 0 3039300 High Signal Region +chr17 3075400 3085400 High Signal Region +chr17 3378900 3380800 High Signal Region +chr17 5863900 5885100 High Signal Region +chr17 6219100 6717500 High Signal Region +chr17 6877300 7037900 High Signal Region +chr17 7302300 7430200 High Signal Region +chr17 7615300 7617200 High Signal Region +chr17 7950200 8052300 High Signal Region +chr17 11097900 11105100 High Signal Region +chr17 13018500 13469100 High Signal Region +chr17 13492200 13555800 High Signal Region +chr17 13584800 13656200 High Signal Region +chr17 14961200 15054300 Low Mappability +chr17 20859400 20865200 High Signal Region +chr17 23426600 23537000 High Signal Region +chr17 23730600 23732500 High Signal Region +chr17 24095300 24097300 High Signal Region +chr17 29101000 29109600 High Signal Region +chr17 31569500 31571400 High Signal Region +chr17 35367400 35480300 Low Mappability +chr17 36230300 36232500 High Signal Region +chr17 38498200 38500800 High Signal Region +chr17 39842000 39849700 High Signal Region +chr17 40422500 40427000 High Signal Region +chr17 50569500 50571400 High Signal Region +chr17 53034300 53056100 High Signal Region +chr17 53151500 53153500 High Signal Region +chr17 53807400 53820300 High Signal Region +chr17 54112300 54134200 High Signal Region +chr17 57368400 57399900 High Signal Region +chr17 62736600 62738500 High Signal Region +chr17 66798500 66800400 High Signal Region +chr17 67740400 67742500 High Signal Region +chr17 70962200 70964800 High Signal Region +chr17 82975900 82991600 High Signal Region +chr17 84458800 84464500 Low Mappability +chr17 85264100 85266000 High Signal Region +chr17 93017000 93047400 High Signal Region +chr17 93623500 93646700 High Signal Region +chr17 94886200 94987200 High Signal Region +chr18 0 3063700 High Signal Region +chr18 3085500 3142600 High Signal Region +chr18 3568100 3570100 Low Mappability +chr18 3619800 3652100 Low Mappability +chr18 3779700 3785600 High Signal Region +chr18 3815100 3819300 High Signal Region +chr18 3873200 3889000 High Signal Region +chr18 4194700 4199900 High Signal Region +chr18 4456700 4504600 High Signal Region +chr18 4658000 4664400 Low Mappability +chr18 4695200 4701800 Low Mappability +chr18 5499400 5502000 Low Mappability +chr18 5895900 5900400 Low Mappability +chr18 6043700 6046600 Low Mappability +chr18 6343100 6376400 Low Mappability +chr18 6663800 6669200 High Signal Region +chr18 6796200 6803600 Low Mappability +chr18 6853600 6868500 Low Mappability +chr18 7032800 7035500 High Signal Region +chr18 7527500 7534800 High Signal Region +chr18 7782300 7798400 High Signal Region +chr18 7998000 8018800 Low Mappability +chr18 8164900 8183000 High Signal Region +chr18 8243000 8271800 High Signal Region +chr18 8292000 8294000 Low Mappability +chr18 8721900 8747000 High Signal Region +chr18 9095200 9127300 High Signal Region +chr18 9248500 9269200 Low Mappability +chr18 9420000 9426100 High Signal Region +chr18 9890700 9915900 High Signal Region +chr18 11168900 11192100 High Signal Region +chr18 11247700 11293200 High Signal Region +chr18 11626000 11648000 Low Mappability +chr18 12945100 12956300 High Signal Region +chr18 13030000 13041900 High Signal Region +chr18 13161400 13180500 High Signal Region +chr18 13241200 13251100 Low Mappability +chr18 13296400 13300000 High Signal Region +chr18 13513200 13517200 High Signal Region +chr18 14732900 14739600 Low Mappability +chr18 15225500 15232800 High Signal Region +chr18 15366900 15382100 High Signal Region +chr18 15695100 15737600 High Signal Region +chr18 16283100 16288900 High Signal Region +chr18 16988600 17013600 Low Mappability +chr18 17116100 17119600 High Signal Region +chr18 17346100 17352400 High Signal Region +chr18 17425100 17480600 High Signal Region +chr18 17513300 17517900 High Signal Region +chr18 17541300 17559000 High Signal Region +chr18 17593300 17598500 High Signal Region +chr18 17938300 17951600 Low Mappability +chr18 18816600 18823800 High Signal Region +chr18 18916300 18917900 High Signal Region +chr18 18976900 18992400 High Signal Region +chr18 19240600 19289100 High Signal Region +chr18 19345800 19352600 Low Mappability +chr18 19430400 19448100 High Signal Region +chr18 19679600 19681600 Low Mappability +chr18 19812100 19836500 High Signal Region +chr18 20352500 20369800 High Signal Region +chr18 20896200 20910000 Low Mappability +chr18 21261800 21268900 Low Mappability +chr18 21528200 21541600 High Signal Region +chr18 21943200 21945200 Low Mappability +chr18 22297400 22304000 High Signal Region +chr18 23186200 23215300 High Signal Region +chr18 25045100 25047300 High Signal Region +chr18 25253000 25259500 High Signal Region +chr18 25905600 25928600 High Signal Region +chr18 26003000 26008100 Low Mappability +chr18 26829800 26837100 Low Mappability +chr18 26998200 27005600 Low Mappability +chr18 27062000 27068200 High Signal Region +chr18 28151300 28167300 High Signal Region +chr18 28441700 28446600 Low Mappability +chr18 28482900 28484900 High Signal Region +chr18 28814100 28816900 High Signal Region +chr18 28960100 28966000 Low Mappability +chr18 29014700 29022000 High Signal Region +chr18 29557800 29559800 High Signal Region +chr18 29713000 29719200 High Signal Region +chr18 31281100 31294300 High Signal Region +chr18 32758400 32793400 High Signal Region +chr18 33212800 33221500 Low Mappability +chr18 33275100 33331000 High Signal Region +chr18 33697400 33722600 Low Mappability +chr18 34083600 34087300 Low Mappability +chr18 34397100 34409800 Low Mappability +chr18 35318500 35320400 Low Mappability +chr18 36454200 36494600 Low Mappability +chr18 36981500 36988700 Low Mappability +chr18 37031800 37045800 High Signal Region +chr18 37364600 37398900 Low Mappability +chr18 37545500 37645000 High Signal Region +chr18 39598600 39604900 High Signal Region +chr18 40306300 40309300 High Signal Region +chr18 40708500 40713600 Low Mappability +chr18 41381600 41387500 High Signal Region +chr18 41465300 41471500 High Signal Region +chr18 41820100 41826100 High Signal Region +chr18 41960600 41966100 High Signal Region +chr18 42556800 42559800 High Signal Region +chr18 42913000 42914900 High Signal Region +chr18 43335500 43337900 High Signal Region +chr18 43889500 43900400 High Signal Region +chr18 44033600 44050200 High Signal Region +chr18 44228000 44263100 High Signal Region +chr18 44291600 44295600 High Signal Region +chr18 44361600 44380500 High Signal Region +chr18 44873100 44875100 Low Mappability +chr18 44981000 45032700 High Signal Region +chr18 45131400 45133400 High Signal Region +chr18 45291700 45314300 Low Mappability +chr18 45357300 45364700 Low Mappability +chr18 45392200 45397700 High Signal Region +chr18 45506800 45513400 High Signal Region +chr18 45998300 46038000 Low Mappability +chr18 46082000 46101400 High Signal Region +chr18 46439100 46444100 Low Mappability +chr18 46791400 46793400 Low Mappability +chr18 47648600 47654100 Low Mappability +chr18 47769900 47783100 Low Mappability +chr18 48009500 48011400 High Signal Region +chr18 48208100 48220300 High Signal Region +chr18 48705800 48713100 Low Mappability +chr18 48831300 48836100 High Signal Region +chr18 49387700 49397800 High Signal Region +chr18 49669200 49695600 High Signal Region +chr18 50253400 50268700 High Signal Region +chr18 50632100 50700200 Low Mappability +chr18 51072000 51077600 Low Mappability +chr18 51658600 51698300 High Signal Region +chr18 52020200 52059300 High Signal Region +chr18 52256200 52262200 High Signal Region +chr18 52378900 52395000 Low Mappability +chr18 52876200 52883200 High Signal Region +chr18 53828800 53839900 Low Mappability +chr18 53869300 53876600 Low Mappability +chr18 54023900 54030000 High Signal Region +chr18 54288100 54335900 Low Mappability +chr18 54698000 54707800 High Signal Region +chr18 55222400 55224400 Low Mappability +chr18 55311000 55321100 Low Mappability +chr18 55414800 55436200 Low Mappability +chr18 55899800 55901700 High Signal Region +chr18 55938500 55954100 High Signal Region +chr18 56273000 56276900 High Signal Region +chr18 56302600 56304500 High Signal Region +chr18 56341200 56346000 High Signal Region +chr18 56826900 56830200 Low Mappability +chr18 57560400 57562500 Low Mappability +chr18 58992700 58999300 Low Mappability +chr18 59496300 59511000 High Signal Region +chr18 59929900 59955000 High Signal Region +chr18 60042400 60044400 Low Mappability +chr18 60206100 60238100 High Signal Region +chr18 60525200 60533800 Low Mappability +chr18 62237400 62247700 High Signal Region +chr18 62273700 62292800 Low Mappability +chr18 62752700 62755100 High Signal Region +chr18 64131300 64132600 High Signal Region +chr18 64448400 64454900 Low Mappability +chr18 65103100 65105000 High Signal Region +chr18 65385700 65405100 Low Mappability +chr18 65492400 65494700 Low Mappability +chr18 65716300 65719400 Low Mappability +chr18 66543200 66548900 High Signal Region +chr18 66750000 66759900 Low Mappability +chr18 66881200 66887200 High Signal Region +chr18 68381300 68387800 High Signal Region +chr18 68412100 68425800 Low Mappability +chr18 68461300 68489000 High Signal Region +chr18 68691100 68693200 High Signal Region +chr18 69759300 69761300 Low Mappability +chr18 70489500 70515400 High Signal Region +chr18 70775600 70791900 High Signal Region +chr18 70842100 70849200 Low Mappability +chr18 71032500 71038800 High Signal Region +chr18 71139200 71145200 High Signal Region +chr18 71208200 71211300 Low Mappability +chr18 71267000 71273300 Low Mappability +chr18 71630400 71641100 Low Mappability +chr18 72753900 72794900 High Signal Region +chr18 72987900 72991000 High Signal Region +chr18 73259600 73264100 Low Mappability +chr18 74553100 74566400 High Signal Region +chr18 74745500 74758500 Low Mappability +chr18 74880300 74882000 High Signal Region +chr18 76177900 76184300 Low Mappability +chr18 76579700 76586300 Low Mappability +chr18 77264400 77271000 High Signal Region +chr18 78197300 78199300 High Signal Region +chr18 78407800 78428500 Low Mappability +chr18 78861400 78867900 High Signal Region +chr18 80021700 80028900 Low Mappability +chr18 80307500 80309600 Low Mappability +chr18 80455500 80518400 Low Mappability +chr18 81299700 81306200 Low Mappability +chr18 82052100 82058200 High Signal Region +chr18 82160100 82227800 High Signal Region +chr18 82319500 82339900 High Signal Region +chr18 82692900 82717900 Low Mappability +chr18 83171100 83178400 Low Mappability +chr18 83700500 83707900 Low Mappability +chr18 84828700 84833000 High Signal Region +chr18 85035000 85080600 High Signal Region +chr18 85105800 85112200 High Signal Region +chr18 85169900 85175900 High Signal Region +chr18 85377800 85382800 Low Mappability +chr18 85697000 85699200 High Signal Region +chr18 85783600 85789900 High Signal Region +chr18 86508300 86510200 High Signal Region +chr18 86560600 86586100 High Signal Region +chr18 86828500 86849500 High Signal Region +chr18 87006300 87009800 High Signal Region +chr18 87141500 87161200 High Signal Region +chr18 87568300 87574300 High Signal Region +chr18 88149300 88155400 High Signal Region +chr18 89030400 89036400 High Signal Region +chr18 89615900 89650500 Low Mappability +chr18 89983200 89989700 Low Mappability +chr18 90055500 90092500 High Signal Region +chr18 90113400 90125400 Low Mappability +chr18 90464100 90501300 High Signal Region +chr18 90601200 90702600 High Signal Region +chr19 0 3140800 High Signal Region +chr19 3161400 3248600 High Signal Region +chr19 4061100 4066400 Low Mappability +chr19 6581000 6594300 High Signal Region +chr19 7713600 7774800 High Signal Region +chr19 7810700 7843900 Low Mappability +chr19 8203200 8285500 Low Mappability +chr19 9250500 9357700 High Signal Region +chr19 9502000 9565000 Low Mappability +chr19 9745800 9803300 High Signal Region +chr19 9823500 9837700 High Signal Region +chr19 10507900 10510300 High Signal Region +chr19 10954500 10960300 Low Mappability +chr19 11199700 11239800 High Signal Region +chr19 12447200 12454600 Low Mappability +chr19 13203500 13216400 High Signal Region +chr19 13330600 13357100 High Signal Region +chr19 13685000 13693300 High Signal Region +chr19 13760500 13777200 High Signal Region +chr19 15256700 15263000 High Signal Region +chr19 15433400 15438100 High Signal Region +chr19 15711800 15719800 High Signal Region +chr19 15839200 15846600 High Signal Region +chr19 15956500 15958500 Low Mappability +chr19 16670500 16673100 High Signal Region +chr19 18358000 18364200 High Signal Region +chr19 18532700 18535600 High Signal Region +chr19 19132200 19161200 High Signal Region +chr19 19509000 19514900 High Signal Region +chr19 19870300 19876900 Low Mappability +chr19 20080700 20081800 High Signal Region +chr19 20140700 20144100 Low Mappability +chr19 20288200 20297900 Low Mappability +chr19 20455400 20462700 Low Mappability +chr19 20839700 20843900 Low Mappability +chr19 21218200 21243800 High Signal Region +chr19 21532400 21534400 Low Mappability +chr19 22644100 22651700 High Signal Region +chr19 22722400 22728400 Low Mappability +chr19 23356500 23358400 High Signal Region +chr19 23739200 23754000 High Signal Region +chr19 24040300 24042300 Low Mappability +chr19 24911900 24919200 High Signal Region +chr19 25741800 25770100 High Signal Region +chr19 25917500 25920000 High Signal Region +chr19 27751400 27758100 High Signal Region +chr19 28149600 28156600 High Signal Region +chr19 30907400 30908700 High Signal Region +chr19 30963600 30968000 Low Mappability +chr19 31722800 31735800 High Signal Region +chr19 32203200 32211600 Low Mappability +chr19 32441800 32449100 Low Mappability +chr19 32822000 32824000 Low Mappability +chr19 33439100 33446100 Low Mappability +chr19 33864200 33877900 High Signal Region +chr19 33949100 33958200 High Signal Region +chr19 34131200 34161200 Low Mappability +chr19 34581900 34613000 High Signal Region +chr19 35076400 35079800 High Signal Region +chr19 35650200 35673500 High Signal Region +chr19 36702500 36723400 High Signal Region +chr19 37298800 37301800 Low Mappability +chr19 37617300 37624600 Low Mappability +chr19 38490200 38495300 Low Mappability +chr19 39078100 39079500 High Signal Region +chr19 39106700 39156300 High Signal Region +chr19 39244700 39270400 High Signal Region +chr19 39331700 39424100 High Signal Region +chr19 39599900 39607200 Low Mappability +chr19 39658700 39695100 Low Mappability +chr19 40020400 40026800 Low Mappability +chr19 40094100 40153300 High Signal Region +chr19 40328500 40330000 Low Mappability +chr19 41142700 41150000 Low Mappability +chr19 41424200 41473100 Low Mappability +chr19 42346000 42350500 Low Mappability +chr19 42647600 42649700 Low Mappability +chr19 43118800 43124600 High Signal Region +chr19 43236000 43238000 Low Mappability +chr19 43321500 43323700 High Signal Region +chr19 44145700 44171700 Low Mappability +chr19 44218500 44225000 Low Mappability +chr19 44862100 44864300 High Signal Region +chr19 45004900 45096500 Low Mappability +chr19 45182300 45190200 High Signal Region +chr19 45649000 45661500 High Signal Region +chr19 45699400 45706300 Low Mappability +chr19 47590300 47602700 Low Mappability +chr19 48484600 48496700 High Signal Region +chr19 48743800 48746300 High Signal Region +chr19 50107900 50114400 Low Mappability +chr19 50309700 50311600 High Signal Region +chr19 50754100 50755900 Low Mappability +chr19 50828900 50835600 High Signal Region +chr19 51649700 51655800 High Signal Region +chr19 51949000 51955700 Low Mappability +chr19 52303100 52309700 Low Mappability +chr19 52927900 52932300 Low Mappability +chr19 52967800 52991100 Low Mappability +chr19 53522200 53527100 High Signal Region +chr19 53767900 53777800 High Signal Region +chr19 54235200 54236600 High Signal Region +chr19 54884700 54936800 High Signal Region +chr19 54994900 55001700 Low Mappability +chr19 55976700 55984000 Low Mappability +chr19 56248700 56259000 Low Mappability +chr19 56846600 56849100 High Signal Region +chr19 57514200 57520700 Low Mappability +chr19 57634000 57635600 Low Mappability +chr19 57827000 57832700 Low Mappability +chr19 58012500 58014600 Low Mappability +chr19 58112400 58114500 High Signal Region +chr19 58481300 58483200 High Signal Region +chr19 59221800 59240400 High Signal Region +chr19 59763100 59779900 High Signal Region +chr19 60082500 60089900 High Signal Region +chr19 60906900 60934000 High Signal Region +chr19 61162600 61174300 Low Mappability +chr19 61197700 61268100 High Signal Region +chr19 61330300 61431500 High Signal Region +chr1 8628600 8719100 High Signal Region +chr1 12038300 12041400 High Signal Region +chr1 14958600 14992600 High Signal Region +chr1 17466800 17479900 High Signal Region +chr1 18872500 18901300 High Signal Region +chr1 19175300 19177200 High Signal Region +chr1 22555000 22556900 High Signal Region +chr1 24610600 24617100 High Signal Region +chr1 24683100 24685100 High Signal Region +chr1 26685100 26689200 High Signal Region +chr1 43776800 43779800 High Signal Region +chr1 44198000 44202200 High Signal Region +chr1 46701700 46756600 High Signal Region +chr1 48880600 48882500 High Signal Region +chr1 56119600 56143500 High Signal Region +chr1 56772200 56783300 High Signal Region +chr1 58613000 58614900 High Signal Region +chr1 63629100 63631600 High Signal Region +chr1 69455800 69457800 High Signal Region +chr1 71078400 71085500 High Signal Region +chr1 71250600 71256700 High Signal Region +chr1 73549100 73555300 High Signal Region +chr1 73832600 73902400 High Signal Region +chr1 78572900 78575400 High Signal Region +chr1 84953500 85663200 High Signal Region +chr1 88209400 88311700 High Signal Region +chr1 94093800 94109400 High Signal Region +chr1 95451000 95452900 High Signal Region +chr1 95783900 95789700 High Signal Region +chr1 95810200 95851700 High Signal Region +chr1 100737900 100760500 High Signal Region +chr1 101040100 101046300 High Signal Region +chr1 102627300 102644300 High Signal Region +chr1 105226800 105230700 High Signal Region +chr1 110170400 110188300 High Signal Region +chr1 113602700 113604800 High Signal Region +chr1 114557300 114579100 High Signal Region +chr1 114643300 114660500 High Signal Region +chr1 115447500 115482800 High Signal Region +chr1 122356200 122358200 High Signal Region +chr1 133593600 133611300 High Signal Region +chr1 142651800 142672300 High Signal Region +chr1 145444500 145449100 High Signal Region +chr1 146120600 146128200 High Signal Region +chr1 151181600 151212000 High Signal Region +chr1 165862800 165864700 Low Mappability +chr1 171033000 171112400 High Signal Region +chr1 172716800 172738200 High Signal Region +chr1 172878700 172885100 High Signal Region +chr1 178538700 178540700 High Signal Region +chr1 181742100 181752400 High Signal Region +chr1 182628900 182630800 High Signal Region +chr1 183298200 183300500 High Signal Region +chr1 190299400 190304600 High Signal Region +chr1 192453100 192471800 High Signal Region +chr1 193226900 193228800 High Signal Region +chr1 195239800 195257400 High Signal Region +chr1 195278100 195280200 High Signal Region +chr1 195320700 195471900 High Signal Region +chr2 0 3086300 High Signal Region +chr2 3474900 3488800 High Signal Region +chr2 3932700 3939100 Low Mappability +chr2 3963500 3986100 High Signal Region +chr2 4515100 4518600 High Signal Region +chr2 4600600 4620300 High Signal Region +chr2 5378100 5394600 High Signal Region +chr2 5545900 5561600 High Signal Region +chr2 6078200 6095300 High Signal Region +chr2 6773100 6777500 Low Mappability +chr2 6832200 6846700 High Signal Region +chr2 7137500 7139600 High Signal Region +chr2 7404000 7458100 High Signal Region +chr2 7571700 7609800 High Signal Region +chr2 7656300 7669700 Low Mappability +chr2 7752800 7758500 High Signal Region +chr2 8034600 8042900 High Signal Region +chr2 8266200 8275600 High Signal Region +chr2 8528400 8535700 High Signal Region +chr2 8938000 8940500 High Signal Region +chr2 9212600 9219300 High Signal Region +chr2 10177100 10183400 Low Mappability +chr2 10483200 10501500 Low Mappability +chr2 10677000 10697600 Low Mappability +chr2 12605500 12668600 High Signal Region +chr2 13824000 13869200 High Signal Region +chr2 13946300 13948900 High Signal Region +chr2 14014100 14035300 High Signal Region +chr2 14359100 14386600 High Signal Region +chr2 14919000 14924500 High Signal Region +chr2 15301300 15334700 High Signal Region +chr2 15430100 15435500 Low Mappability +chr2 15575900 15602800 High Signal Region +chr2 15716700 15721100 High Signal Region +chr2 15768300 15770500 High Signal Region +chr2 16192400 16198500 High Signal Region +chr2 16320200 16326500 Low Mappability +chr2 16762800 16787000 High Signal Region +chr2 17383200 17385100 High Signal Region +chr2 17612500 17654500 Low Mappability +chr2 17747200 17753000 High Signal Region +chr2 19209900 19212900 High Signal Region +chr2 19498400 19510300 High Signal Region +chr2 19707900 19712200 High Signal Region +chr2 20038500 20067400 Low Mappability +chr2 20426800 20433300 Low Mappability +chr2 20898900 20901100 High Signal Region +chr2 21062600 21082200 Low Mappability +chr2 22049700 22087700 High Signal Region +chr2 22137300 22165500 High Signal Region +chr2 22389900 22608700 High Signal Region +chr2 22737300 22745800 High Signal Region +chr2 23009600 23015000 Low Mappability +chr2 23274600 23304900 High Signal Region +chr2 23693700 23707900 High Signal Region +chr2 24193300 24199000 High Signal Region +chr2 26333100 26351900 Low Mappability +chr2 26759100 26763600 High Signal Region +chr2 26998200 27004400 Low Mappability +chr2 28183200 28205000 High Signal Region +chr2 30204600 30239600 Low Mappability +chr2 32381300 32488200 Low Mappability +chr2 33933000 33935300 High Signal Region +chr2 34049900 34051800 High Signal Region +chr2 34903900 34935900 Low Mappability +chr2 35090800 35109900 High Signal Region +chr2 35505000 35526700 Low Mappability +chr2 36008600 36019300 Low Mappability +chr2 36401900 36413100 High Signal Region +chr2 36508600 36515200 High Signal Region +chr2 36542800 36549100 High Signal Region +chr2 36761000 36766500 High Signal Region +chr2 36951900 36970700 High Signal Region +chr2 37156900 37185900 High Signal Region +chr2 37339700 37359400 Low Mappability +chr2 38564700 38566600 Low Mappability +chr2 39225400 39293200 High Signal Region +chr2 39360600 39367900 Low Mappability +chr2 39517800 39534800 High Signal Region +chr2 39778500 39785700 Low Mappability +chr2 39887500 39915800 High Signal Region +chr2 40131200 40240800 High Signal Region +chr2 40262500 40268600 High Signal Region +chr2 40766400 40794000 High Signal Region +chr2 41059500 41070200 Low Mappability +chr2 41168700 41171400 High Signal Region +chr2 41692800 41694800 High Signal Region +chr2 41744300 41751600 Low Mappability +chr2 41775100 41781500 High Signal Region +chr2 41895300 41897200 High Signal Region +chr2 42044500 42051600 High Signal Region +chr2 42200300 42240700 High Signal Region +chr2 42950100 42956600 High Signal Region +chr2 43347900 43356400 High Signal Region +chr2 44936600 44942400 High Signal Region +chr2 46224800 46226700 High Signal Region +chr2 46343100 46348100 Low Mappability +chr2 46574200 46579600 Low Mappability +chr2 47008600 47023500 High Signal Region +chr2 47196300 47199300 High Signal Region +chr2 47533600 47642600 High Signal Region +chr2 47942200 47943800 High Signal Region +chr2 48483000 48491000 Low Mappability +chr2 50543200 50545500 High Signal Region +chr2 50679600 50686800 Low Mappability +chr2 51552600 51555600 High Signal Region +chr2 51750900 51756000 High Signal Region +chr2 51881600 51890600 Low Mappability +chr2 51945900 51948400 High Signal Region +chr2 52695900 52718600 High Signal Region +chr2 52786800 52796300 High Signal Region +chr2 53317700 53321600 Low Mappability +chr2 53347800 53367000 High Signal Region +chr2 53633400 53642900 High Signal Region +chr2 53745700 53799800 High Signal Region +chr2 54252600 54258500 High Signal Region +chr2 54698000 54747900 High Signal Region +chr2 54862600 54895300 High Signal Region +chr2 55197500 55216400 High Signal Region +chr2 55308300 55353700 High Signal Region +chr2 55823800 55829000 High Signal Region +chr2 55860200 55874300 Low Mappability +chr2 55942000 55947800 High Signal Region +chr2 56192800 56194600 High Signal Region +chr2 56298700 56304900 High Signal Region +chr2 56465200 56471900 High Signal Region +chr2 56834300 56879100 High Signal Region +chr2 56988500 56990600 Low Mappability +chr2 57166400 57172900 Low Mappability +chr2 57214400 57223500 Low Mappability +chr2 57417400 57446500 High Signal Region +chr2 57628500 57633800 High Signal Region +chr2 57726600 57728500 High Signal Region +chr2 58212900 58263100 High Signal Region +chr2 58648300 58691900 High Signal Region +chr2 58881200 58902500 High Signal Region +chr2 59971300 59972800 Low Mappability +chr2 61038200 61042700 High Signal Region +chr2 61959600 61965300 High Signal Region +chr2 62022900 62040100 High Signal Region +chr2 62861100 62867200 High Signal Region +chr2 63297300 63302700 Low Mappability +chr2 63368100 63403900 High Signal Region +chr2 63462300 63483800 High Signal Region +chr2 63641200 63654600 High Signal Region +chr2 63718200 63725400 High Signal Region +chr2 63838100 63845300 Low Mappability +chr2 64309200 64319600 High Signal Region +chr2 64608400 64633400 Low Mappability +chr2 64698700 64703300 High Signal Region +chr2 65592500 65602200 High Signal Region +chr2 65737700 65781500 Low Mappability +chr2 66721600 66750400 High Signal Region +chr2 66845100 66852300 High Signal Region +chr2 67408400 67414500 High Signal Region +chr2 67939700 67946000 High Signal Region +chr2 68770400 68776700 High Signal Region +chr2 68917800 68924100 Low Mappability +chr2 69353900 69356600 High Signal Region +chr2 70263100 70270000 Low Mappability +chr2 70880100 70892900 High Signal Region +chr2 71054700 71071300 Low Mappability +chr2 71942000 71949500 Low Mappability +chr2 72270200 72275700 Low Mappability +chr2 73867000 73868900 High Signal Region +chr2 74364300 74402600 Low Mappability +chr2 74437600 74444900 Low Mappability +chr2 75499500 75504600 High Signal Region +chr2 77224000 77230500 Low Mappability +chr2 78318000 78339500 High Signal Region +chr2 79437700 79441900 High Signal Region +chr2 79936500 79943700 High Signal Region +chr2 80119000 80121500 High Signal Region +chr2 80220600 80257700 Low Mappability +chr2 80795600 80838700 High Signal Region +chr2 80879000 80880200 High Signal Region +chr2 80956500 81006000 High Signal Region +chr2 81069000 81075100 High Signal Region +chr2 81639400 81644800 High Signal Region +chr2 81750800 81756800 High Signal Region +chr2 81790000 81795900 High Signal Region +chr2 82329800 82340100 High Signal Region +chr2 82673800 82679900 High Signal Region +chr2 82714300 82728500 High Signal Region +chr2 82783900 82789500 High Signal Region +chr2 82868800 82887900 High Signal Region +chr2 82916300 82936800 High Signal Region +chr2 83120100 83146100 High Signal Region +chr2 83185100 83193200 High Signal Region +chr2 83325900 83328200 High Signal Region +chr2 83413500 83587500 High Signal Region +chr2 83865600 83893100 High Signal Region +chr2 83931600 83995800 Low Mappability +chr2 84080900 84085600 High Signal Region +chr2 84505000 84510500 Low Mappability +chr2 84532500 84534600 Low Mappability +chr2 84564800 84576000 Low Mappability +chr2 85685600 85701800 Low Mappability +chr2 85874000 85896300 High Signal Region +chr2 86018200 86021700 Low Mappability +chr2 86303400 86317700 High Signal Region +chr2 86339600 86346900 Low Mappability +chr2 86612700 86617500 High Signal Region +chr2 87381000 87382800 High Signal Region +chr2 87875700 87941300 High Signal Region +chr2 88167400 88212600 High Signal Region +chr2 88776200 88780800 High Signal Region +chr2 89206600 89277100 Low Mappability +chr2 89345700 89350400 High Signal Region +chr2 89761200 89775100 High Signal Region +chr2 89856400 89920100 High Signal Region +chr2 90127200 90132700 High Signal Region +chr2 90157100 90249100 High Signal Region +chr2 90273200 90279100 High Signal Region +chr2 90309300 90396100 High Signal Region +chr2 92092600 92094700 High Signal Region +chr2 92167200 92169100 High Signal Region +chr2 93824700 93850200 High Signal Region +chr2 94602800 94607800 Low Mappability +chr2 94633900 94656500 High Signal Region +chr2 94801000 94809400 Low Mappability +chr2 94852800 94891200 High Signal Region +chr2 95064700 95093500 Low Mappability +chr2 95148000 95167800 High Signal Region +chr2 95215900 95320600 High Signal Region +chr2 95414700 95420600 High Signal Region +chr2 95536400 95538400 Low Mappability +chr2 95647900 95654300 High Signal Region +chr2 95794500 95799200 High Signal Region +chr2 95929300 95934400 High Signal Region +chr2 96191400 96208900 High Signal Region +chr2 96547800 96566800 Low Mappability +chr2 96954700 96977300 High Signal Region +chr2 97021000 97034600 High Signal Region +chr2 97308000 97327600 High Signal Region +chr2 97671600 97686300 High Signal Region +chr2 97760700 97765800 High Signal Region +chr2 97872400 97958200 High Signal Region +chr2 98361700 98449600 High Signal Region +chr2 98659400 98668200 High Signal Region +chr2 98796500 98801900 High Signal Region +chr2 99020000 99057500 High Signal Region +chr2 99300200 99320300 High Signal Region +chr2 99944600 99970200 High Signal Region +chr2 100112000 100114300 High Signal Region +chr2 100223900 100238300 High Signal Region +chr2 100418400 100777900 Low Mappability +chr2 101127200 101153600 Low Mappability +chr2 101313100 101350600 High Signal Region +chr2 102828400 102830400 High Signal Region +chr2 103231300 103232300 High Signal Region +chr2 103852300 103872800 High Signal Region +chr2 104684900 104697300 High Signal Region +chr2 105249300 105259000 High Signal Region +chr2 105539300 105563200 Low Mappability +chr2 105825900 105865100 High Signal Region +chr2 106555100 106569300 High Signal Region +chr2 107134100 107140900 High Signal Region +chr2 107593900 107601200 Low Mappability +chr2 107710100 107712400 High Signal Region +chr2 108608600 108614000 High Signal Region +chr2 108945100 108972800 High Signal Region +chr2 109629400 109636000 High Signal Region +chr2 110016800 110025500 High Signal Region +chr2 110091100 110128700 High Signal Region +chr2 110157100 110163300 High Signal Region +chr2 110292700 110294600 High Signal Region +chr2 110545800 110583400 High Signal Region +chr2 110752400 110780100 High Signal Region +chr2 111007400 111018600 High Signal Region +chr2 111042000 111046600 High Signal Region +chr2 111172700 111179800 High Signal Region +chr2 111281500 111287900 Low Mappability +chr2 111545600 111553300 Low Mappability +chr2 111716900 111722900 High Signal Region +chr2 111844900 111866400 High Signal Region +chr2 111890900 111898900 High Signal Region +chr2 112053900 112086000 High Signal Region +chr2 112319700 112326200 Low Mappability +chr2 112522900 112570500 High Signal Region +chr2 112602800 112605100 High Signal Region +chr2 112701400 112707900 High Signal Region +chr2 113095800 113102400 Low Mappability +chr2 113330900 113333000 Low Mappability +chr2 113518400 113524900 Low Mappability +chr2 113564300 113565700 High Signal Region +chr2 113659300 113673200 High Signal Region +chr2 114180800 114187400 Low Mappability +chr2 114242400 114244000 High Signal Region +chr2 114469200 114504000 High Signal Region +chr2 116454300 116524000 High Signal Region +chr2 117829600 117835500 High Signal Region +chr2 118017700 118020200 High Signal Region +chr2 120608600 120650200 High Signal Region +chr2 120810300 120821000 High Signal Region +chr2 121435600 121523600 High Signal Region +chr2 121938800 121957600 High Signal Region +chr2 122680400 122683200 High Signal Region +chr2 123288000 123294300 Low Mappability +chr2 123496800 123525300 High Signal Region +chr2 123785200 123790700 High Signal Region +chr2 124002700 124004600 High Signal Region +chr2 124798800 124835800 High Signal Region +chr2 125625000 125635900 Low Mappability +chr2 126217400 126263800 High Signal Region +chr2 126445400 126447400 Low Mappability +chr2 126964900 126972100 Low Mappability +chr2 127720400 127734000 Low Mappability +chr2 128050800 128053200 High Signal Region +chr2 128480400 128486900 Low Mappability +chr2 128772500 128774500 Low Mappability +chr2 129499400 129523400 High Signal Region +chr2 129602700 129613700 Low Mappability +chr2 131791800 131793800 High Signal Region +chr2 131908300 131931100 Low Mappability +chr2 131963900 131983700 High Signal Region +chr2 132885700 132890400 High Signal Region +chr2 132952400 132954500 Low Mappability +chr2 133053200 133083400 High Signal Region +chr2 133239300 133261800 High Signal Region +chr2 133934000 133937500 High Signal Region +chr2 134560100 134577900 High Signal Region +chr2 134661800 134673000 High Signal Region +chr2 134746600 134751100 High Signal Region +chr2 135146800 135151900 High Signal Region +chr2 135987600 135989700 High Signal Region +chr2 136234300 136286800 Low Mappability +chr2 137028200 137037000 High Signal Region +chr2 137345900 137369900 High Signal Region +chr2 137394500 137405600 High Signal Region +chr2 137640000 137642300 High Signal Region +chr2 137890200 137895000 High Signal Region +chr2 138035000 138056400 Low Mappability +chr2 138573700 138580400 High Signal Region +chr2 138621500 138624200 High Signal Region +chr2 138833600 138853100 High Signal Region +chr2 138904300 138935000 High Signal Region +chr2 139433200 139476200 High Signal Region +chr2 140345800 140352400 Low Mappability +chr2 142197000 142204400 Low Mappability +chr2 142464200 142483300 Low Mappability +chr2 142789100 142795600 Low Mappability +chr2 143275500 143290300 High Signal Region +chr2 143725900 143764700 High Signal Region +chr2 144627800 144636700 Low Mappability +chr2 144975200 144977100 High Signal Region +chr2 145001300 145003200 High Signal Region +chr2 145118300 145146300 Low Mappability +chr2 145236800 145242600 Low Mappability +chr2 145625100 145630800 Low Mappability +chr2 145732700 145734600 High Signal Region +chr2 146135700 146176900 High Signal Region +chr2 146995700 147013200 Low Mappability +chr2 147675300 147677500 High Signal Region +chr2 147864800 147871300 High Signal Region +chr2 147918800 147925100 Low Mappability +chr2 148410500 148416000 Low Mappability +chr2 148459900 148473800 High Signal Region +chr2 148612700 148620200 Low Mappability +chr2 148939300 148984200 High Signal Region +chr2 149049800 149056000 High Signal Region +chr2 149269400 149292700 High Signal Region +chr2 150413500 150452500 High Signal Region +chr2 150728300 150749700 Low Mappability +chr2 151029700 151385300 High Signal Region +chr2 151408800 151496700 High Signal Region +chr2 152157000 152159000 Low Mappability +chr2 152206800 152227500 High Signal Region +chr2 152263400 152269900 Low Mappability +chr2 153674800 153693100 Low Mappability +chr2 154174200 154180000 High Signal Region +chr2 154353800 154359700 Low Mappability +chr2 155016300 155051500 High Signal Region +chr2 155235400 155258100 High Signal Region +chr2 156185100 156214400 Low Mappability +chr2 157566000 157655300 Low Mappability +chr2 157833200 157835600 High Signal Region +chr2 158286300 158292800 High Signal Region +chr2 159455200 159469500 High Signal Region +chr2 160620300 160638500 High Signal Region +chr2 161368800 161376200 High Signal Region +chr2 161984900 161990900 High Signal Region +chr2 162369100 162376700 High Signal Region +chr2 162594500 162602700 High Signal Region +chr2 162843800 162847600 High Signal Region +chr2 163519100 163533100 Low Mappability +chr2 163644500 163655100 High Signal Region +chr2 163788900 163796100 Low Mappability +chr2 163833800 163849200 Low Mappability +chr2 163958100 163963000 Low Mappability +chr2 164201000 164202700 High Signal Region +chr2 165477300 165529900 Low Mappability +chr2 165675100 165679500 Low Mappability +chr2 165848700 165953000 Low Mappability +chr2 166530600 166535100 Low Mappability +chr2 166780500 166832200 Low Mappability +chr2 167269400 167291100 High Signal Region +chr2 167407900 167423000 Low Mappability +chr2 170315100 170320000 High Signal Region +chr2 170503800 170509800 High Signal Region +chr2 171814300 171816700 High Signal Region +chr2 171912800 171932200 Low Mappability +chr2 172007100 172014300 High Signal Region +chr2 172743600 172751100 Low Mappability +chr2 173098700 173101000 Low Mappability +chr2 173706700 173708800 High Signal Region +chr2 174961800 176745500 High Signal Region +chr2 176767100 177166600 High Signal Region +chr2 177232400 177490200 High Signal Region +chr2 177526700 177841000 High Signal Region +chr2 178775000 178794400 High Signal Region +chr2 180025600 180093500 Low Mappability +chr2 181169900 181188000 Low Mappability +chr2 181285900 181298800 High Signal Region +chr2 181739800 181745800 High Signal Region +chr2 181885000 181933400 High Signal Region +chr2 182003800 182113200 High Signal Region +chr3 0 3052500 High Signal Region +chr3 3084100 3098300 High Signal Region +chr3 3123200 3150800 High Signal Region +chr3 3443300 3493700 High Signal Region +chr3 4698100 4725500 High Signal Region +chr3 5517700 5525000 Low Mappability +chr3 5859400 5863500 High Signal Region +chr3 6115100 6117100 High Signal Region +chr3 6601900 6627400 High Signal Region +chr3 6900700 6916400 High Signal Region +chr3 6941100 6946600 High Signal Region +chr3 7178300 7223900 High Signal Region +chr3 7477600 7482500 High Signal Region +chr3 7910300 7916600 High Signal Region +chr3 8225200 8247500 High Signal Region +chr3 8574000 8589900 High Signal Region +chr3 8815300 8838700 High Signal Region +chr3 9091900 9096900 Low Mappability +chr3 9777500 9778500 High Signal Region +chr3 9904100 9910700 High Signal Region +chr3 9952100 9967100 High Signal Region +chr3 10453800 10464500 High Signal Region +chr3 10961700 10971700 High Signal Region +chr3 11050200 11070500 High Signal Region +chr3 11120700 11143300 High Signal Region +chr3 11518700 11524700 High Signal Region +chr3 11779200 11806000 High Signal Region +chr3 11933500 11938400 High Signal Region +chr3 11961500 11973100 High Signal Region +chr3 12107500 12131400 High Signal Region +chr3 12221200 12262000 High Signal Region +chr3 12336000 12339700 High Signal Region +chr3 12814500 12857800 Low Mappability +chr3 12906200 12907300 High Signal Region +chr3 13219400 13222800 High Signal Region +chr3 13821100 13826600 Low Mappability +chr3 13965800 13972000 High Signal Region +chr3 14272100 14336300 High Signal Region +chr3 14449600 14478500 High Signal Region +chr3 14593200 14597400 High Signal Region +chr3 14668900 14744700 High Signal Region +chr3 15028800 15045100 High Signal Region +chr3 15079500 15087400 High Signal Region +chr3 15451600 15872400 High Signal Region +chr3 15964200 15967200 High Signal Region +chr3 16351400 16357100 High Signal Region +chr3 16626000 16633700 High Signal Region +chr3 16995700 17021400 High Signal Region +chr3 17419700 17447600 High Signal Region +chr3 17679600 17682100 High Signal Region +chr3 17954200 17997400 High Signal Region +chr3 18379800 18395100 High Signal Region +chr3 18432100 18437500 High Signal Region +chr3 18966900 18983600 High Signal Region +chr3 19357600 19359300 High Signal Region +chr3 19594900 19601100 High Signal Region +chr3 19917700 19940300 High Signal Region +chr3 21247500 21250200 High Signal Region +chr3 21317800 21324600 High Signal Region +chr3 21383700 21389000 High Signal Region +chr3 21512900 21519300 High Signal Region +chr3 21661800 21663700 Low Mappability +chr3 21685300 21709500 High Signal Region +chr3 22069200 22070500 High Signal Region +chr3 22240800 22250100 High Signal Region +chr3 22362000 22377000 High Signal Region +chr3 22517600 22521100 High Signal Region +chr3 22612100 22759200 High Signal Region +chr3 22933800 23015000 High Signal Region +chr3 23077300 23099800 High Signal Region +chr3 23173700 23180900 Low Mappability +chr3 23302200 23321100 High Signal Region +chr3 23353500 23360000 High Signal Region +chr3 23463300 23468200 High Signal Region +chr3 23579500 23584900 High Signal Region +chr3 23841700 23843800 Low Mappability +chr3 24624400 24627900 High Signal Region +chr3 24655200 24661300 High Signal Region +chr3 25210800 25228800 Low Mappability +chr3 25277500 25310400 High Signal Region +chr3 25416900 25421600 Low Mappability +chr3 25472900 25478900 High Signal Region +chr3 26089400 26113400 High Signal Region +chr3 26346800 26369700 High Signal Region +chr3 26724600 26737000 High Signal Region +chr3 26944500 26950800 High Signal Region +chr3 27010100 27023300 High Signal Region +chr3 27309300 27319800 Low Mappability +chr3 28198300 28201300 Low Mappability +chr3 28513900 28535500 High Signal Region +chr3 28983500 29014200 High Signal Region +chr3 29461500 29492300 High Signal Region +chr3 29675900 29680600 High Signal Region +chr3 31176300 31188900 Low Mappability +chr3 31340700 31364500 Low Mappability +chr3 31651800 31680100 High Signal Region +chr3 31819800 31826900 High Signal Region +chr3 33696500 33708400 High Signal Region +chr3 33768300 33798500 High Signal Region +chr3 33930000 33948800 Low Mappability +chr3 34516200 34518200 High Signal Region +chr3 35285400 35292700 High Signal Region +chr3 35707000 35713500 Low Mappability +chr3 35743300 35744600 High Signal Region +chr3 36106500 36109400 High Signal Region +chr3 36285400 36291100 High Signal Region +chr3 36847300 36853900 High Signal Region +chr3 39026800 39030900 High Signal Region +chr3 39183300 39189800 High Signal Region +chr3 40151300 40157700 High Signal Region +chr3 40347600 40352600 High Signal Region +chr3 40549300 40651700 High Signal Region +chr3 41871900 41887800 High Signal Region +chr3 41993500 41999500 High Signal Region +chr3 42170000 42187300 High Signal Region +chr3 42682100 42722800 High Signal Region +chr3 42820200 42827400 High Signal Region +chr3 43108100 43197200 High Signal Region +chr3 43466400 43492100 High Signal Region +chr3 43538900 43557700 High Signal Region +chr3 44185900 44191600 High Signal Region +chr3 44241200 44260000 High Signal Region +chr3 44401500 44407500 High Signal Region +chr3 44559600 44565200 High Signal Region +chr3 44884400 44890700 High Signal Region +chr3 45579200 45591900 High Signal Region +chr3 45848500 45863400 Low Mappability +chr3 45986000 45990700 High Signal Region +chr3 46141000 46148200 High Signal Region +chr3 46338200 46340300 Low Mappability +chr3 46735000 46741900 High Signal Region +chr3 46795400 46805400 High Signal Region +chr3 46910900 46936200 High Signal Region +chr3 47592800 47598000 High Signal Region +chr3 47798300 47799600 High Signal Region +chr3 47966600 47968700 High Signal Region +chr3 48437800 48462000 High Signal Region +chr3 49443600 49482800 High Signal Region +chr3 49727200 49734400 High Signal Region +chr3 50464900 50474400 High Signal Region +chr3 50763700 50814900 High Signal Region +chr3 50957300 50963000 High Signal Region +chr3 51233600 51245400 Low Mappability +chr3 51616000 51623700 Low Mappability +chr3 51765300 51784900 High Signal Region +chr3 52230000 52233400 High Signal Region +chr3 53426900 53431000 High Signal Region +chr3 54849100 54874300 Low Mappability +chr3 56069700 56075200 High Signal Region +chr3 56210900 56215900 High Signal Region +chr3 56513600 56576700 High Signal Region +chr3 56903800 56943000 High Signal Region +chr3 57059400 57070200 High Signal Region +chr3 57349800 57379400 High Signal Region +chr3 58051100 58081600 Low Mappability +chr3 59370700 59412200 High Signal Region +chr3 59565300 59632700 High Signal Region +chr3 59684600 59689200 High Signal Region +chr3 59791800 59804200 Low Mappability +chr3 59887400 59889300 High Signal Region +chr3 59919200 59921100 High Signal Region +chr3 60044300 60046800 High Signal Region +chr3 60489700 60495200 Low Mappability +chr3 61150800 61177900 High Signal Region +chr3 61260700 61275000 Low Mappability +chr3 61495400 61499700 High Signal Region +chr3 61672300 61678300 High Signal Region +chr3 61707600 61726600 Low Mappability +chr3 61853900 61858900 High Signal Region +chr3 62032400 62038600 High Signal Region +chr3 62108300 62160100 High Signal Region +chr3 62356900 62367700 High Signal Region +chr3 62543000 62549200 High Signal Region +chr3 62873000 62879300 High Signal Region +chr3 63515500 63530100 High Signal Region +chr3 63590100 63591500 High Signal Region +chr3 64171000 64172900 High Signal Region +chr3 64237900 64245700 High Signal Region +chr3 64453100 64512800 High Signal Region +chr3 64609600 64665300 Low Mappability +chr3 64697900 64730500 High Signal Region +chr3 67027900 67054100 High Signal Region +chr3 67262400 67264000 High Signal Region +chr3 67411100 67419400 High Signal Region +chr3 67747300 67752800 High Signal Region +chr3 67786800 67793600 High Signal Region +chr3 68114300 68119700 Low Mappability +chr3 68519400 68525100 High Signal Region +chr3 69228600 69230500 High Signal Region +chr3 69848400 69854900 High Signal Region +chr3 69944400 69949800 High Signal Region +chr3 71117300 71122800 High Signal Region +chr3 71369600 71447800 High Signal Region +chr3 72273600 72293700 High Signal Region +chr3 72698100 72704800 High Signal Region +chr3 73088300 73098500 High Signal Region +chr3 73733100 73738500 Low Mappability +chr3 74583300 74598400 Low Mappability +chr3 74865000 74881800 High Signal Region +chr3 75348300 75378700 Low Mappability +chr3 75409000 75424100 High Signal Region +chr3 76598800 76604700 High Signal Region +chr3 76886600 76892900 Low Mappability +chr3 77597400 77604300 Low Mappability +chr3 77667400 77711400 High Signal Region +chr3 77926800 77931400 High Signal Region +chr3 78281900 78283900 Low Mappability +chr3 79012700 79014900 High Signal Region +chr3 79046300 79052800 Low Mappability +chr3 79763800 79780000 High Signal Region +chr3 79959500 79965700 High Signal Region +chr3 80465400 80472000 High Signal Region +chr3 82283300 82288700 Low Mappability +chr3 82462100 82508600 Low Mappability +chr3 82589000 82616700 Low Mappability +chr3 82921400 82924800 High Signal Region +chr3 83123200 83125100 High Signal Region +chr3 83330900 83343400 High Signal Region +chr3 83845100 83867000 High Signal Region +chr3 84142200 84149700 Low Mappability +chr3 84359000 84366300 Low Mappability +chr3 85305200 85326800 Low Mappability +chr3 85622200 85629500 Low Mappability +chr3 87424200 87426100 High Signal Region +chr3 87469300 87474600 High Signal Region +chr3 88044000 88066500 High Signal Region +chr3 88666500 88673500 Low Mappability +chr3 88716700 88873000 Low Mappability +chr3 90761500 90810400 High Signal Region +chr3 90991100 90996800 Low Mappability +chr3 91856700 91898200 High Signal Region +chr3 92185400 92291300 High Signal Region +chr3 93059200 93107000 High Signal Region +chr3 93168500 93172800 High Signal Region +chr3 93203900 93229100 High Signal Region +chr3 93323700 93331700 Low Mappability +chr3 93860300 94093700 High Signal Region +chr3 94136200 94152300 High Signal Region +chr3 94658300 94665700 Low Mappability +chr3 94690000 94730800 High Signal Region +chr3 94757600 94765200 Low Mappability +chr3 96043600 96058900 High Signal Region +chr3 96196200 96288300 High Signal Region +chr3 96313200 96388900 Low Mappability +chr3 96446800 96463800 Low Mappability +chr3 96485600 96514300 High Signal Region +chr3 96840000 96863800 High Signal Region +chr3 97245200 97251500 High Signal Region +chr3 98396100 98411400 High Signal Region +chr3 98443100 98597600 Low Mappability +chr3 98709300 98778900 High Signal Region +chr3 98986000 99034100 High Signal Region +chr3 99406000 99434100 High Signal Region +chr3 99882900 99908100 High Signal Region +chr3 99980200 99982200 High Signal Region +chr3 100315500 100330900 High Signal Region +chr3 100484400 100486300 High Signal Region +chr3 102813400 102839300 High Signal Region +chr3 102983600 102989900 High Signal Region +chr3 103134600 103136000 High Signal Region +chr3 103427600 103447900 High Signal Region +chr3 103555000 103557000 Low Mappability +chr3 104116800 104123100 High Signal Region +chr3 104194200 104198800 High Signal Region +chr3 104588100 104595500 Low Mappability +chr3 105028200 105030500 High Signal Region +chr3 106118500 106311800 High Signal Region +chr3 106777900 106779700 High Signal Region +chr3 109258500 109277300 High Signal Region +chr3 109458000 109462700 High Signal Region +chr3 110319800 110325700 High Signal Region +chr3 110416300 110421800 High Signal Region +chr3 111256100 111268600 High Signal Region +chr3 111578400 111605200 Low Mappability +chr3 111794100 111799000 Low Mappability +chr3 111830400 111836300 High Signal Region +chr3 112274500 112287300 High Signal Region +chr3 112315500 112337400 High Signal Region +chr3 112561900 112586900 High Signal Region +chr3 112863500 112869300 High Signal Region +chr3 112913800 112918000 High Signal Region +chr3 113186300 113189100 High Signal Region +chr3 113250900 113527800 High Signal Region +chr3 113709900 113719000 High Signal Region +chr3 113742300 113748300 High Signal Region +chr3 114272600 114279400 High Signal Region +chr3 114472100 114499300 Low Mappability +chr3 114587900 114595900 High Signal Region +chr3 114976700 114982800 High Signal Region +chr3 115020700 115027100 Low Mappability +chr3 115367700 115372200 Low Mappability +chr3 115905900 115922900 High Signal Region +chr3 116817400 116843900 Low Mappability +chr3 117267200 117292400 High Signal Region +chr3 117379100 117386400 Low Mappability +chr3 118055100 118060000 High Signal Region +chr3 119211800 119212900 High Signal Region +chr3 120735000 120742200 High Signal Region +chr3 120825200 120851500 High Signal Region +chr3 121248900 121250900 High Signal Region +chr3 121694400 121696100 High Signal Region +chr3 122294000 122329300 High Signal Region +chr3 122654100 122657300 High Signal Region +chr3 122804300 122806600 High Signal Region +chr3 123471600 123476200 Low Mappability +chr3 123729200 123743200 High Signal Region +chr3 123924800 123957700 High Signal Region +chr3 124282300 124288300 High Signal Region +chr3 125902800 125908900 High Signal Region +chr3 126127300 126136000 Low Mappability +chr3 126905300 126910600 High Signal Region +chr3 127522400 127523700 Low Mappability +chr3 127771600 127780600 High Signal Region +chr3 128203600 128211000 High Signal Region +chr3 128440100 128446100 High Signal Region +chr3 128935800 128937700 High Signal Region +chr3 129020900 129032100 High Signal Region +chr3 129393000 129394900 High Signal Region +chr3 133123600 133130800 Low Mappability +chr3 133566400 133568700 High Signal Region +chr3 133636000 133642800 High Signal Region +chr3 133837100 133859400 High Signal Region +chr3 134007400 134026700 Low Mappability +chr3 134685700 134690700 High Signal Region +chr3 134862500 134888400 High Signal Region +chr3 135148300 135163000 High Signal Region +chr3 136173700 136181000 Low Mappability +chr3 137407500 137413500 High Signal Region +chr3 137469200 137470300 High Signal Region +chr3 138200900 138207900 High Signal Region +chr3 139365700 139417700 High Signal Region +chr3 140376900 140384200 Low Mappability +chr3 142190700 142192800 High Signal Region +chr3 142513000 142517200 High Signal Region +chr3 143840800 143847000 High Signal Region +chr3 144030200 144036300 High Signal Region +chr3 144655600 144660600 High Signal Region +chr3 145040500 145061800 High Signal Region +chr3 145109000 145114400 Low Mappability +chr3 145188100 145190400 High Signal Region +chr3 145301600 145303100 High Signal Region +chr3 146073300 146102400 High Signal Region +chr3 146358800 146362600 High Signal Region +chr3 146476200 146479000 High Signal Region +chr3 146918900 146924200 High Signal Region +chr3 147107400 147113000 High Signal Region +chr3 147769500 147781800 High Signal Region +chr3 147874500 147877600 High Signal Region +chr3 148704800 148716900 High Signal Region +chr3 148750100 148757400 Low Mappability +chr3 148797800 148799700 High Signal Region +chr3 149051500 149053800 High Signal Region +chr3 150120900 150123800 High Signal Region +chr3 150336900 150341400 Low Mappability +chr3 151028900 151031200 High Signal Region +chr3 151657500 151679800 High Signal Region +chr3 152313800 152332200 High Signal Region +chr3 152700700 152702700 High Signal Region +chr3 153090100 153109400 High Signal Region +chr3 154640300 154646700 High Signal Region +chr3 154931700 154932800 High Signal Region +chr3 155515800 155517600 High Signal Region +chr3 155765900 155771900 High Signal Region +chr3 156256900 156262800 Low Mappability +chr3 156285600 156322500 High Signal Region +chr3 156799400 156804900 Low Mappability +chr3 157646900 157678300 High Signal Region +chr3 157946200 157969400 High Signal Region +chr3 158095300 158119200 High Signal Region +chr3 158698600 158756800 High Signal Region +chr3 159165900 159179700 High Signal Region +chr3 159225800 159239300 Low Mappability +chr3 159478300 159479700 High Signal Region +chr3 159748800 159826500 High Signal Region +chr3 159938500 160039600 High Signal Region +chr4 0 3114800 High Signal Region +chr4 3139700 3333100 High Signal Region +chr4 18476200 18498400 High Signal Region +chr4 20168700 20213200 High Signal Region +chr4 20804100 20808300 High Signal Region +chr4 20982300 20983700 High Signal Region +chr4 21281300 21287700 High Signal Region +chr4 22535900 22542300 High Signal Region +chr4 24193400 24201100 High Signal Region +chr4 25471300 25473200 High Signal Region +chr4 28175900 28177900 High Signal Region +chr4 31353200 31355200 High Signal Region +chr4 34934800 34936700 High Signal Region +chr4 35042700 35048900 High Signal Region +chr4 38305900 38322000 High Signal Region +chr4 57979700 57981800 High Signal Region +chr4 64454600 64499000 High Signal Region +chr4 68427300 68447900 High Signal Region +chr4 70367200 70379200 High Signal Region +chr4 73196300 73209300 High Signal Region +chr4 80001800 80004900 High Signal Region +chr4 83536900 83541900 High Signal Region +chr4 90725600 90727500 High Signal Region +chr4 92230800 92236500 High Signal Region +chr4 93843500 93853100 High Signal Region +chr4 99380500 99382400 High Signal Region +chr4 110469700 110505300 High Signal Region +chr4 118546100 118549600 High Signal Region +chr4 131222500 131229300 High Signal Region +chr4 145404200 147840400 High Signal Region +chr4 149809200 149811700 High Signal Region +chr4 153152100 153154100 High Signal Region +chr4 156256000 156508100 High Signal Region +chr5 3175400 3186000 High Signal Region +chr5 12489500 12490600 High Signal Region +chr5 14899000 15726800 High Signal Region +chr5 17466700 17481500 High Signal Region +chr5 36629400 36662500 High Signal Region +chr5 46434800 46436700 High Signal Region +chr5 49722200 49755700 High Signal Region +chr5 60041900 60043900 Low Mappability +chr5 80499900 80501900 High Signal Region +chr5 93288700 93351800 High Signal Region +chr5 106126300 106177800 High Signal Region +chr5 110063700 110075500 High Signal Region +chr5 114921500 114923500 High Signal Region +chr5 137148800 137153800 High Signal Region +chr5 146260000 146262300 High Signal Region +chr5 151733600 151834600 High Signal Region +chr6 0 3255700 High Signal Region +chr6 3280700 3340300 High Signal Region +chr6 4922900 4925100 High Signal Region +chr6 5608000 5657900 High Signal Region +chr6 5704400 5706800 High Signal Region +chr6 6400000 6442800 High Signal Region +chr6 6700000 6727600 High Signal Region +chr6 8729200 8731100 High Signal Region +chr6 8906700 8932300 High Signal Region +chr6 9519200 9529100 High Signal Region +chr6 9580600 9610100 High Signal Region +chr6 9646900 9663400 High Signal Region +chr6 9720400 9733100 High Signal Region +chr6 9889000 9891100 High Signal Region +chr6 10228400 10269900 High Signal Region +chr6 10559100 10588400 High Signal Region +chr6 10623400 10633900 High Signal Region +chr6 11251100 11256800 High Signal Region +chr6 11406400 11457900 High Signal Region +chr6 11813900 11897100 High Signal Region +chr6 12671100 12680300 High Signal Region +chr6 13390500 13394900 High Signal Region +chr6 13700500 13743100 High Signal Region +chr6 14085000 14092300 Low Mappability +chr6 14793800 14805500 High Signal Region +chr6 14929200 14935100 High Signal Region +chr6 16299700 16310100 High Signal Region +chr6 16922600 16924800 High Signal Region +chr6 17004600 17042000 High Signal Region +chr6 17391200 17397900 High Signal Region +chr6 17981700 17983400 High Signal Region +chr6 18264800 18267200 High Signal Region +chr6 18836700 18848600 High Signal Region +chr6 19068900 19075400 High Signal Region +chr6 20113900 20143500 High Signal Region +chr6 21452400 21458100 High Signal Region +chr6 21801300 21803200 High Signal Region +chr6 21841300 21845300 High Signal Region +chr6 21873300 21876800 High Signal Region +chr6 22107700 22131800 High Signal Region +chr6 22479600 22483900 High Signal Region +chr6 22516700 22534300 High Signal Region +chr6 25505600 25566400 Low Mappability +chr6 26049500 26072100 High Signal Region +chr6 26247700 26278000 High Signal Region +chr6 26834800 26840700 High Signal Region +chr6 26988500 26992000 High Signal Region +chr6 27199000 27228400 High Signal Region +chr6 28924100 28929500 Low Mappability +chr6 29746800 29750000 High Signal Region +chr6 29974300 29978200 High Signal Region +chr6 30752800 30806400 High Signal Region +chr6 30929300 30936100 Low Mappability +chr6 31594900 31597200 High Signal Region +chr6 32740700 32746800 High Signal Region +chr6 32867600 32869000 High Signal Region +chr6 33490300 33495000 High Signal Region +chr6 33650500 33665400 High Signal Region +chr6 33743900 33749000 High Signal Region +chr6 36224300 36230500 High Signal Region +chr6 40535500 40559800 Low Mappability +chr6 40716600 40723700 High Signal Region +chr6 42122800 42174200 High Signal Region +chr6 42492600 42516600 High Signal Region +chr6 42617600 42620900 High Signal Region +chr6 44265200 44270800 High Signal Region +chr6 44497000 44513300 High Signal Region +chr6 44785200 44794100 High Signal Region +chr6 44836300 44837500 High Signal Region +chr6 46381300 46402000 High Signal Region +chr6 46678600 46685300 High Signal Region +chr6 47639000 47779200 High Signal Region +chr6 48120300 48122300 High Signal Region +chr6 48149300 48172900 High Signal Region +chr6 48231500 48292600 High Signal Region +chr6 48320300 48347000 High Signal Region +chr6 49235500 49237500 High Signal Region +chr6 50601400 50636700 Low Mappability +chr6 51046500 51048400 High Signal Region +chr6 53464100 53487500 Low Mappability +chr6 54976500 54993700 High Signal Region +chr6 56232700 56257500 High Signal Region +chr6 56455900 56465300 High Signal Region +chr6 57425200 57455700 High Signal Region +chr6 57588900 57634500 High Signal Region +chr6 57919500 57925700 High Signal Region +chr6 58068500 58073500 High Signal Region +chr6 58588700 58612800 High Signal Region +chr6 59123600 59130100 High Signal Region +chr6 59199600 59230600 High Signal Region +chr6 59584300 59598000 High Signal Region +chr6 59676000 59698200 High Signal Region +chr6 60622400 60625600 High Signal Region +chr6 60668000 60688200 High Signal Region +chr6 61023100 61029400 High Signal Region +chr6 61088400 61094600 High Signal Region +chr6 62525500 62527300 High Signal Region +chr6 64331600 64338900 Low Mappability +chr6 64778500 64812500 High Signal Region +chr6 64882100 64930500 High Signal Region +chr6 65100600 65106700 High Signal Region +chr6 65184300 65261600 High Signal Region +chr6 66070200 66095900 High Signal Region +chr6 66815600 66831600 High Signal Region +chr6 67311500 67312900 High Signal Region +chr6 67494800 67522100 Low Mappability +chr6 67576400 67630800 High Signal Region +chr6 67658300 67710900 High Signal Region +chr6 68011000 68012900 High Signal Region +chr6 68221900 68252400 Low Mappability +chr6 68641400 68661300 High Signal Region +chr6 68971900 68996400 High Signal Region +chr6 69017600 69035700 High Signal Region +chr6 70000300 70053000 High Signal Region +chr6 70187800 70213700 High Signal Region +chr6 70620700 70648600 High Signal Region +chr6 73105700 73113400 High Signal Region +chr6 73502200 73521000 High Signal Region +chr6 73671400 73672600 High Signal Region +chr6 74191700 74194400 High Signal Region +chr6 74365900 74386400 High Signal Region +chr6 74700100 74705300 High Signal Region +chr6 75054000 75083000 High Signal Region +chr6 76645400 76649100 High Signal Region +chr6 76847200 76854100 High Signal Region +chr6 78352900 78359500 High Signal Region +chr6 78456200 78491700 Low Mappability +chr6 78637400 78639700 High Signal Region +chr6 78716700 78722400 High Signal Region +chr6 79627500 79635200 High Signal Region +chr6 79817300 79819200 High Signal Region +chr6 79898900 79922800 Low Mappability +chr6 79959800 79967500 Low Mappability +chr6 81012200 81036700 High Signal Region +chr6 81829400 81875000 High Signal Region +chr6 81997000 82011600 High Signal Region +chr6 82213400 82218800 High Signal Region +chr6 84662700 84688200 High Signal Region +chr6 84712600 84720200 High Signal Region +chr6 89723500 89735600 High Signal Region +chr6 91768300 91770200 High Signal Region +chr6 92321600 92328300 High Signal Region +chr6 94988600 94990700 Low Mappability +chr6 95030100 95043800 Low Mappability +chr6 95475600 95479900 High Signal Region +chr6 95980800 95987100 High Signal Region +chr6 96877800 96896100 High Signal Region +chr6 97356800 97379400 High Signal Region +chr6 101571200 101621400 High Signal Region +chr6 102379600 102384100 High Signal Region +chr6 102483000 102505700 High Signal Region +chr6 102767600 102791400 High Signal Region +chr6 103313700 103315600 High Signal Region +chr6 103647900 103650200 High Signal Region +chr6 103750700 103752000 High Signal Region +chr6 105194700 105199600 High Signal Region +chr6 105253400 105257600 Low Mappability +chr6 105306000 105337600 High Signal Region +chr6 107141500 107146300 High Signal Region +chr6 107284300 107299800 High Signal Region +chr6 107860500 107920500 High Signal Region +chr6 109498200 109506200 High Signal Region +chr6 109641800 109648100 High Signal Region +chr6 109984000 110013000 High Signal Region +chr6 114340600 114343000 High Signal Region +chr6 114492200 114643400 High Signal Region +chr6 116021200 116043900 High Signal Region +chr6 116238700 116252600 High Signal Region +chr6 116566200 116593800 High Signal Region +chr6 117087400 117094300 High Signal Region +chr6 118209000 118234000 High Signal Region +chr6 119419600 119431100 High Signal Region +chr6 121690100 121703800 High Signal Region +chr6 122614200 122616600 High Signal Region +chr6 123132100 123179400 High Signal Region +chr6 123204800 123242900 High Signal Region +chr6 126135200 126137300 Low Mappability +chr6 128680200 128693700 High Signal Region +chr6 128861200 128865300 High Signal Region +chr6 129857800 129863300 High Signal Region +chr6 129935700 129948400 High Signal Region +chr6 131088300 131114900 High Signal Region +chr6 131208300 131252100 High Signal Region +chr6 131495900 131505900 High Signal Region +chr6 132497200 132523000 Low Mappability +chr6 132597000 132598700 High Signal Region +chr6 132635400 132642000 High Signal Region +chr6 133169000 133170900 High Signal Region +chr6 133891500 133899800 High Signal Region +chr6 134689500 134692700 High Signal Region +chr6 138216100 138221900 High Signal Region +chr6 138647300 138649100 High Signal Region +chr6 138685400 138700700 High Signal Region +chr6 142060700 142079300 High Signal Region +chr6 142396700 142400200 Low Mappability +chr6 142433400 142439400 High Signal Region +chr6 143014400 143016300 High Signal Region +chr6 143466500 143481400 High Signal Region +chr6 143883500 143886900 High Signal Region +chr6 144655200 144670000 High Signal Region +chr6 145784700 145787000 High Signal Region +chr6 145931800 145933900 Low Mappability +chr6 146018900 146080500 High Signal Region +chr6 147077200 147079900 High Signal Region +chr6 147459800 147465000 Low Mappability +chr6 147549600 147555000 Low Mappability +chr6 147881900 147908400 High Signal Region +chr6 148013100 148038400 High Signal Region +chr6 148121800 148124500 High Signal Region +chr6 148635700 148640300 Low Mappability +chr6 148662900 148665000 Low Mappability +chr6 149585500 149736500 High Signal Region +chr7 4558200 4594300 High Signal Region +chr7 4648600 4651500 High Signal Region +chr7 5153200 5244900 High Signal Region +chr7 5588700 5591600 High Signal Region +chr7 6050500 6056000 High Signal Region +chr7 6249400 6251400 High Signal Region +chr7 6590800 6597400 High Signal Region +chr7 7209500 7231000 High Signal Region +chr7 7273500 7327400 High Signal Region +chr7 7527500 7533900 High Signal Region +chr7 7556800 8278400 High Signal Region +chr7 8490800 9968800 High Signal Region +chr7 9992100 9998900 High Signal Region +chr7 10314900 10320900 High Signal Region +chr7 11097700 11123700 High Signal Region +chr7 11271100 11438600 High Signal Region +chr7 12009500 12084600 High Signal Region +chr7 12379600 12385400 High Signal Region +chr7 12526600 12548100 High Signal Region +chr7 13112300 13118100 High Signal Region +chr7 13591200 13620200 High Signal Region +chr7 14051300 14055900 High Signal Region +chr7 14767700 14823800 High Signal Region +chr7 14930100 15023000 High Signal Region +chr7 15128800 15623000 High Signal Region +chr7 16661400 16667800 High Signal Region +chr7 17112200 17123900 High Signal Region +chr7 17215800 17323400 High Signal Region +chr7 17800000 17806700 High Signal Region +chr7 17829700 17862600 High Signal Region +chr7 18487100 18493200 High Signal Region +chr7 19032600 19034500 High Signal Region +chr7 20799700 21103900 High Signal Region +chr7 21135700 23286800 High Signal Region +chr7 23494700 23503600 High Signal Region +chr7 24026200 24031700 High Signal Region +chr7 24103800 24108200 High Signal Region +chr7 24729400 24731300 High Signal Region +chr7 26022700 26066900 High Signal Region +chr7 26779000 26780900 High Signal Region +chr7 27082300 27098300 High Signal Region +chr7 27712800 27732500 High Signal Region +chr7 31365500 31387000 High Signal Region +chr7 31818200 31876700 High Signal Region +chr7 31934500 32043100 High Signal Region +chr7 32215700 32235200 High Signal Region +chr7 32629300 33098700 High Signal Region +chr7 33124200 33198000 High Signal Region +chr7 33949500 34004800 High Signal Region +chr7 34957200 34959100 High Signal Region +chr7 38396600 38787200 High Signal Region +chr7 38839800 39181000 High Signal Region +chr7 39227600 39404100 High Signal Region +chr7 39874600 39875900 High Signal Region +chr7 41791900 41851900 High Signal Region +chr7 43123800 43220300 High Signal Region +chr7 44737800 44739900 High Signal Region +chr7 47175100 47188600 High Signal Region +chr7 47414400 47519700 High Signal Region +chr7 48102600 48135800 High Signal Region +chr7 50940400 50986800 High Signal Region +chr7 51329800 51335900 High Signal Region +chr7 51800300 51812600 High Signal Region +chr7 51909200 51911200 High Signal Region +chr7 52095700 52104400 High Signal Region +chr7 52283300 52288900 High Signal Region +chr7 53677100 53683100 High Signal Region +chr7 53977800 54027400 High Signal Region +chr7 54336000 54351800 High Signal Region +chr7 54808900 54810100 High Signal Region +chr7 54923000 54971200 High Signal Region +chr7 55011500 55016500 High Signal Region +chr7 55080000 55086300 High Signal Region +chr7 55115400 55141000 High Signal Region +chr7 55657400 55667100 High Signal Region +chr7 56062300 56081700 High Signal Region +chr7 56160100 56163400 Low Mappability +chr7 56660300 56693600 High Signal Region +chr7 57367200 57374700 High Signal Region +chr7 58040300 58077100 High Signal Region +chr7 58161700 58177900 High Signal Region +chr7 59673100 59910900 High Signal Region +chr7 60209400 60215600 High Signal Region +chr7 60676300 60682800 High Signal Region +chr7 61320100 61395400 High Signal Region +chr7 62135200 62137500 High Signal Region +chr7 62651400 62693400 High Signal Region +chr7 63272500 63287100 High Signal Region +chr7 63431300 63432400 High Signal Region +chr7 63803700 63810800 High Signal Region +chr7 63908200 63910100 High Signal Region +chr7 64072600 64134600 High Signal Region +chr7 64465300 64496400 High Signal Region +chr7 64601000 64617900 High Signal Region +chr7 65187500 65198300 High Signal Region +chr7 68534700 68537900 High Signal Region +chr7 68775900 68778100 High Signal Region +chr7 69086500 69102900 High Signal Region +chr7 69785300 69792200 High Signal Region +chr7 70757900 70765000 High Signal Region +chr7 71971100 71984500 High Signal Region +chr7 72317400 72337900 High Signal Region +chr7 72630000 72679900 High Signal Region +chr7 73212000 73218800 High Signal Region +chr7 73671700 73680000 High Signal Region +chr7 75003200 75007700 High Signal Region +chr7 76067800 76079300 High Signal Region +chr7 76556000 76573000 High Signal Region +chr7 76703900 76708400 High Signal Region +chr7 77520600 77526000 High Signal Region +chr7 78416900 78422400 High Signal Region +chr7 80708100 80730100 Low Mappability +chr7 80787500 80813800 High Signal Region +chr7 81756100 81760500 High Signal Region +chr7 82770300 82772800 High Signal Region +chr7 85017700 85023600 High Signal Region +chr7 85757200 85768800 High Signal Region +chr7 86118700 86125800 High Signal Region +chr7 86497400 86503500 High Signal Region +chr7 86532600 86534000 High Signal Region +chr7 86805600 86807500 High Signal Region +chr7 87989300 88000600 High Signal Region +chr7 89683300 89704600 High Signal Region +chr7 90087300 90089400 High Signal Region +chr7 90441000 90442900 High Signal Region +chr7 91741500 91747500 High Signal Region +chr7 93259400 93278100 High Signal Region +chr7 93699600 93717500 High Signal Region +chr7 93744000 93766100 High Signal Region +chr7 93969600 93973700 High Signal Region +chr7 94293000 94299300 High Signal Region +chr7 94822500 94848800 High Signal Region +chr7 95177200 95193600 High Signal Region +chr7 95527400 95533200 High Signal Region +chr7 97795000 97797300 High Signal Region +chr7 103100800 103115000 High Signal Region +chr7 103195500 103202100 High Signal Region +chr7 103483000 103487500 High Signal Region +chr7 104097400 104126600 High Signal Region +chr7 104476800 104477900 High Signal Region +chr7 104770000 104801200 High Signal Region +chr7 105830300 106325300 High Signal Region +chr7 106979000 106984900 High Signal Region +chr7 107245200 107271400 High Signal Region +chr7 108780600 108789800 High Signal Region +chr7 110058500 110061600 High Signal Region +chr7 111228400 111230600 High Signal Region +chr7 112636600 112639800 High Signal Region +chr7 116432200 116453400 High Signal Region +chr7 119739900 119742100 High Signal Region +chr7 119795700 119797700 High Signal Region +chr7 119998800 120015100 High Signal Region +chr7 124522300 124528300 High Signal Region +chr7 125009800 125016600 High Signal Region +chr7 128171000 128189300 High Signal Region +chr7 130054200 130055700 High Signal Region +chr7 130591400 130596900 High Signal Region +chr7 130833500 130835600 High Signal Region +chr7 134100500 134107200 High Signal Region +chr7 134329200 134335200 High Signal Region +chr7 135006900 135008800 High Signal Region +chr7 135337800 135340900 High Signal Region +chr7 138590500 138594500 High Signal Region +chr7 139447400 139448900 High Signal Region +chr7 140288200 140307300 High Signal Region +chr7 140551100 140558800 High Signal Region +chr7 140580500 140585700 High Signal Region +chr7 141637000 141640700 High Signal Region +chr7 142828900 142845000 High Signal Region +chr7 145340000 145441400 High Signal Region +chr8 3753500 3779100 High Signal Region +chr8 14305800 14308200 High Signal Region +chr8 15508900 15521000 High Signal Region +chr8 19671800 19937800 High Signal Region +chr8 19960800 20868000 High Signal Region +chr8 20945500 20963700 High Signal Region +chr8 23085600 23096700 High Signal Region +chr8 35134000 35135900 High Signal Region +chr8 39132400 39157700 High Signal Region +chr8 55111200 55397300 High Signal Region +chr8 69416700 69597900 High Signal Region +chr8 71432100 71434100 High Signal Region +chr8 71796100 71863300 High Signal Region +chr8 73318700 73320700 High Signal Region +chr8 83755800 83757900 High Signal Region +chr8 114436000 114437900 High Signal Region +chr8 123537300 123638300 High Signal Region +chr8 125778100 125780100 High Signal Region +chr8 129272900 129401200 High Signal Region +chr9 0 3053100 High Signal Region +chr9 3240200 3259800 High Signal Region +chr9 3302000 3336000 High Signal Region +chr9 3461000 3466600 Low Mappability +chr9 3627400 3699700 Low Mappability +chr9 3802100 3806700 High Signal Region +chr9 3881100 3887600 High Signal Region +chr9 4238700 4245700 Low Mappability +chr9 4375700 4406800 High Signal Region +chr9 5248000 5254100 High Signal Region +chr9 5276200 5284600 Low Mappability +chr9 6431500 6467200 High Signal Region +chr9 6742900 6806200 Low Mappability +chr9 7294600 7300700 High Signal Region +chr9 7370900 7412600 Low Mappability +chr9 7520900 7525900 High Signal Region +chr9 8029400 8067100 Low Mappability +chr9 8275900 8292300 Low Mappability +chr9 8447200 8483700 High Signal Region +chr9 8628200 8633700 Low Mappability +chr9 8859900 8865500 High Signal Region +chr9 9598800 9626700 High Signal Region +chr9 9846900 9891900 Low Mappability +chr9 10193200 10198800 Low Mappability +chr9 10701300 10707400 High Signal Region +chr9 10964200 10970600 High Signal Region +chr9 11341900 11345100 High Signal Region +chr9 11722300 11747100 High Signal Region +chr9 11792800 11798400 Low Mappability +chr9 11821400 11845400 High Signal Region +chr9 12282000 12287500 High Signal Region +chr9 12364900 12379600 High Signal Region +chr9 12469100 12472900 Low Mappability +chr9 12768200 12773800 High Signal Region +chr9 12840100 12851100 High Signal Region +chr9 12917600 12922300 High Signal Region +chr9 12998400 13045600 Low Mappability +chr9 13324200 13426100 High Signal Region +chr9 13533500 13535700 High Signal Region +chr9 13994600 13996700 High Signal Region +chr9 14410500 14429300 Low Mappability +chr9 15123900 15136900 High Signal Region +chr9 16607400 16691900 Low Mappability +chr9 16833700 16861000 High Signal Region +chr9 16939400 16950500 Low Mappability +chr9 17059000 17088000 High Signal Region +chr9 17197900 17207600 High Signal Region +chr9 17261400 17263400 Low Mappability +chr9 17387200 17406200 High Signal Region +chr9 17525800 17527700 High Signal Region +chr9 17632000 17636100 High Signal Region +chr9 17916200 17919600 High Signal Region +chr9 18010000 18015600 High Signal Region +chr9 18117000 18162200 Low Mappability +chr9 18235100 18270100 High Signal Region +chr9 18893800 18900100 High Signal Region +chr9 18980400 18994100 High Signal Region +chr9 19268700 19294700 High Signal Region +chr9 19595400 19638400 High Signal Region +chr9 19720500 19725500 Low Mappability +chr9 19901400 19906100 High Signal Region +chr9 20183600 20196700 Low Mappability +chr9 20322100 20407900 High Signal Region +chr9 21879200 21928200 High Signal Region +chr9 22116600 22191600 High Signal Region +chr9 22699500 22731700 High Signal Region +chr9 22892700 22926500 Low Mappability +chr9 22947900 22956900 High Signal Region +chr9 23508700 23526900 High Signal Region +chr9 24523300 24576000 High Signal Region +chr9 25596700 25602700 High Signal Region +chr9 25842900 25863600 High Signal Region +chr9 26096100 26103500 Low Mappability +chr9 26700800 26708000 High Signal Region +chr9 26904600 26911000 High Signal Region +chr9 27212200 27232300 High Signal Region +chr9 27974400 27981700 High Signal Region +chr9 29739800 29741800 Low Mappability +chr9 30604400 30606300 Low Mappability +chr9 30641800 30696800 Low Mappability +chr9 30929800 30931100 High Signal Region +chr9 32059200 32083600 Low Mappability +chr9 32353900 32356500 High Signal Region +chr9 32839200 32846600 Low Mappability +chr9 32888700 32896000 Low Mappability +chr9 32953000 32958100 Low Mappability +chr9 33127100 33161100 Low Mappability +chr9 33392400 33402700 High Signal Region +chr9 33949500 33961900 Low Mappability +chr9 35071200 35091800 High Signal Region +chr9 35304300 35306500 High Signal Region +chr9 36235800 36241900 High Signal Region +chr9 36555000 36569100 High Signal Region +chr9 37331400 37349500 Low Mappability +chr9 37441700 37448100 High Signal Region +chr9 39330900 39359100 High Signal Region +chr9 39444100 39449600 High Signal Region +chr9 39835400 39899000 Low Mappability +chr9 44214200 44235400 Low Mappability +chr9 44305700 44408400 Low Mappability +chr9 47957400 47959300 High Signal Region +chr9 50082000 50088400 High Signal Region +chr9 51667400 51673700 High Signal Region +chr9 52601800 52617200 High Signal Region +chr9 52749000 52756100 High Signal Region +chr9 53089800 53107000 High Signal Region +chr9 53804100 53805400 High Signal Region +chr9 54916200 54928900 High Signal Region +chr9 55070600 55078000 Low Mappability +chr9 55150300 55152300 High Signal Region +chr9 55936900 55972500 High Signal Region +chr9 56222700 56224800 High Signal Region +chr9 56259500 56284300 High Signal Region +chr9 56991700 56993700 Low Mappability +chr9 57408000 57434800 High Signal Region +chr9 58766500 58785800 High Signal Region +chr9 59046200 59052700 Low Mappability +chr9 59103800 59125000 High Signal Region +chr9 60538500 60551200 High Signal Region +chr9 60726100 60733500 High Signal Region +chr9 61721500 61723400 High Signal Region +chr9 62811600 62868300 Low Mappability +chr9 64236700 64255000 Low Mappability +chr9 64410400 64417700 Low Mappability +chr9 65292600 65314200 High Signal Region +chr9 65867400 65909400 High Signal Region +chr9 67198600 67205000 Low Mappability +chr9 68451200 68461200 High Signal Region +chr9 68527100 68534600 High Signal Region +chr9 71080600 71120800 Low Mappability +chr9 71421100 71434600 High Signal Region +chr9 72895800 72900800 Low Mappability +chr9 72957900 72985700 Low Mappability +chr9 73285500 73311300 High Signal Region +chr9 73396800 73412500 Low Mappability +chr9 73861400 73863500 Low Mappability +chr9 73935600 73946700 High Signal Region +chr9 74615600 74641300 Low Mappability +chr9 74664800 74690900 High Signal Region +chr9 74768600 74774600 High Signal Region +chr9 75709200 75736000 Low Mappability +chr9 77079900 77082800 High Signal Region +chr9 77152800 77158800 High Signal Region +chr9 77972400 77974300 High Signal Region +chr9 78175200 78182700 Low Mappability +chr9 78230500 78296900 High Signal Region +chr9 78554700 78589200 Low Mappability +chr9 78755200 78757800 High Signal Region +chr9 78819200 78830500 Low Mappability +chr9 80234500 80235700 High Signal Region +chr9 80660700 80665600 High Signal Region +chr9 81251500 81303200 High Signal Region +chr9 81614000 81620700 High Signal Region +chr9 81906400 81937200 High Signal Region +chr9 83278800 83288100 High Signal Region +chr9 83558300 83560200 High Signal Region +chr9 83935500 83950000 High Signal Region +chr9 83992400 83998900 High Signal Region +chr9 84211900 84226800 High Signal Region +chr9 85898900 85918900 High Signal Region +chr9 86062600 86070000 Low Mappability +chr9 86120100 86137500 High Signal Region +chr9 86458200 86463100 High Signal Region +chr9 87098700 87112200 High Signal Region +chr9 87481400 87500900 High Signal Region +chr9 87576700 87594000 High Signal Region +chr9 87945600 87952400 High Signal Region +chr9 88011000 88013900 High Signal Region +chr9 88592100 88829800 High Signal Region +chr9 89031300 89075400 Low Mappability +chr9 89321400 89361800 High Signal Region +chr9 90147100 90149100 High Signal Region +chr9 90285200 90395300 High Signal Region +chr9 90455400 90456800 High Signal Region +chr9 90808100 90821900 Low Mappability +chr9 90857200 90876300 Low Mappability +chr9 91222100 91268200 High Signal Region +chr9 91598800 91647400 High Signal Region +chr9 92032700 92035300 High Signal Region +chr9 92075300 92113200 High Signal Region +chr9 92239700 92242900 High Signal Region +chr9 92624800 92654500 High Signal Region +chr9 93013300 93035300 High Signal Region +chr9 93286500 93296500 High Signal Region +chr9 93360800 93442100 Low Mappability +chr9 93618000 93668500 Low Mappability +chr9 94821700 94828100 Low Mappability +chr9 95245800 95299600 High Signal Region +chr9 95425000 95426900 High Signal Region +chr9 95829400 95831300 High Signal Region +chr9 96104900 96111400 Low Mappability +chr9 96852000 96854100 High Signal Region +chr9 98343300 98345700 Low Mappability +chr9 98451100 98458500 Low Mappability +chr9 98747700 98771800 Low Mappability +chr9 99266600 99273100 Low Mappability +chr9 99735800 99763300 High Signal Region +chr9 99922800 99937600 High Signal Region +chr9 100073800 100080700 High Signal Region +chr9 100516900 100519200 High Signal Region +chr9 100920400 100922300 High Signal Region +chr9 101085500 101110600 High Signal Region +chr9 101292500 101326600 Low Mappability +chr9 102277400 102283800 Low Mappability +chr9 102764700 102766800 Low Mappability +chr9 102812800 102815000 High Signal Region +chr9 102956300 102970000 Low Mappability +chr9 103296200 103305600 High Signal Region +chr9 103352800 103367100 Low Mappability +chr9 103988500 103990400 High Signal Region +chr9 104524500 104525700 High Signal Region +chr9 104848800 104850600 High Signal Region +chr9 105086200 105119300 High Signal Region +chr9 105818400 105820400 High Signal Region +chr9 107207900 107219900 High Signal Region +chr9 109036600 109083500 High Signal Region +chr9 109245000 109252200 High Signal Region +chr9 109272900 109374100 High Signal Region +chr9 110280300 110306700 High Signal Region +chr9 110443100 110455100 High Signal Region +chr9 110970300 110976000 High Signal Region +chr9 111661900 111668700 High Signal Region +chr9 112330100 112336900 High Signal Region +chr9 112956300 112990600 High Signal Region +chr9 113260500 113262400 High Signal Region +chr9 113535400 113541300 High Signal Region +chr9 114101400 114149500 Low Mappability +chr9 114172400 114322200 High Signal Region +chr9 114970100 114974700 Low Mappability +chr9 115077900 115085200 Low Mappability +chr9 115349900 115351800 High Signal Region +chr9 115496100 115498100 Low Mappability +chr9 116981500 116988600 High Signal Region +chr9 118088300 118151400 High Signal Region +chr9 118674000 118675900 High Signal Region +chr9 119861200 119895000 Low Mappability +chr9 120265300 120288700 High Signal Region +chr9 120633900 120641200 Low Mappability +chr9 121024600 121042700 Low Mappability +chr9 121178300 121184500 High Signal Region +chr9 121220100 121247600 High Signal Region +chr9 121313700 121385800 Low Mappability +chr9 121406300 121418400 Low Mappability +chr9 122161300 122163200 High Signal Region +chr9 122277700 122334500 Low Mappability +chr9 122401500 122441900 Low Mappability +chr9 122660600 122667200 Low Mappability +chr9 122703400 122730400 Low Mappability +chr9 122903900 122906600 High Signal Region +chr9 123190700 123197500 Low Mappability +chr9 123460900 123463100 High Signal Region +chr9 123742600 123753500 Low Mappability +chr9 123851700 123929500 High Signal Region +chr9 123966100 124009300 High Signal Region +chr9 124161300 124282600 High Signal Region +chr9 124494100 124595100 High Signal Region +chrX 3286700 4493800 High Signal Region +chrX 4524500 5370300 High Signal Region +chrX 8346400 8348200 High Signal Region +chrX 8550300 8557800 High Signal Region +chrX 8818900 8824300 High Signal Region +chrX 9345800 9395300 High Signal Region +chrX 9500200 9595700 High Signal Region +chrX 14739100 14741000 High Signal Region +chrX 21466500 21472700 High Signal Region +chrX 21846900 21896100 High Signal Region +chrX 26459300 26505100 High Signal Region +chrX 26907100 29639200 High Signal Region +chrX 29660500 35508900 High Signal Region +chrX 37612500 37669100 High Signal Region +chrX 39073800 39075700 High Signal Region +chrX 41482500 41489500 High Signal Region +chrX 42676200 42688100 High Signal Region +chrX 44239900 44293300 High Signal Region +chrX 44732600 44738600 High Signal Region +chrX 48699000 48771100 High Signal Region +chrX 54269300 55286000 High Signal Region +chrX 55716700 55807400 High Signal Region +chrX 58475000 58478700 High Signal Region +chrX 59773000 59796900 High Signal Region +chrX 61868200 61874000 High Signal Region +chrX 62065700 62084900 High Signal Region +chrX 63509200 63515900 High Signal Region +chrX 63634600 63640900 High Signal Region +chrX 64125800 64132200 High Signal Region +chrX 65962800 65999900 High Signal Region +chrX 66067900 66084000 High Signal Region +chrX 66143100 66145700 High Signal Region +chrX 66316400 66356900 High Signal Region +chrX 67662500 67708500 High Signal Region +chrX 70055300 70072000 High Signal Region +chrX 72800000 72818700 High Signal Region +chrX 75582400 75709000 High Signal Region +chrX 76589100 76607100 High Signal Region +chrX 79135300 79150400 High Signal Region +chrX 81153100 81154600 High Signal Region +chrX 82475800 82481000 High Signal Region +chrX 84290800 84296100 High Signal Region +chrX 87222400 87262500 High Signal Region +chrX 87838600 87845200 High Signal Region +chrX 88230200 88246900 High Signal Region +chrX 89182800 89232600 High Signal Region +chrX 89914800 89916600 High Signal Region +chrX 90308600 90336600 High Signal Region +chrX 92765200 92767900 High Signal Region +chrX 94795400 94980600 High Signal Region +chrX 95265900 95291700 High Signal Region +chrX 97728000 97734800 High Signal Region +chrX 98008600 98033000 High Signal Region +chrX 98585800 98612400 High Signal Region +chrX 101111300 101113600 High Signal Region +chrX 102560800 102585100 High Signal Region +chrX 103455000 103457100 High Signal Region +chrX 104959400 104966000 High Signal Region +chrX 105523800 105529900 High Signal Region +chrX 108202600 108222500 High Signal Region +chrX 108567500 108585200 High Signal Region +chrX 109871000 109876200 High Signal Region +chrX 110976700 110997000 High Signal Region +chrX 112369800 112402300 High Signal Region +chrX 114412500 114421300 High Signal Region +chrX 118100900 118102900 High Signal Region +chrX 118901200 118905100 Low Mappability +chrX 119137300 119142400 High Signal Region +chrX 119247400 119264800 High Signal Region +chrX 119335000 119339300 High Signal Region +chrX 120351000 120355400 High Signal Region +chrX 121511200 121514500 High Signal Region +chrX 122901700 122908000 High Signal Region +chrX 123686000 124042000 High Signal Region +chrX 126695300 126778800 High Signal Region +chrX 127935800 127964600 High Signal Region +chrX 128512700 128514400 High Signal Region +chrX 128959800 128965900 High Signal Region +chrX 129055600 129072400 High Signal Region +chrX 129429300 129448000 High Signal Region +chrX 130696000 130702200 High Signal Region +chrX 131802300 131832800 High Signal Region +chrX 132024200 132026400 High Signal Region +chrX 132158700 132160800 High Signal Region +chrX 134149100 134151200 High Signal Region +chrX 135040100 135056700 High Signal Region +chrX 136459400 136503800 High Signal Region +chrX 136897900 136925800 High Signal Region +chrX 138302200 138324600 High Signal Region +chrX 143471300 143484000 High Signal Region +chrX 144699500 144723900 High Signal Region +chrX 145709800 145739800 High Signal Region +chrX 146582500 146588700 High Signal Region +chrX 146758100 146761900 High Signal Region +chrX 147619400 147620700 High Signal Region +chrX 153994800 154073200 High Signal Region +chrX 154242800 154244800 High Signal Region +chrX 158443900 158460500 High Signal Region +chrX 159120000 159154900 High Signal Region +chrX 161179200 161185600 High Signal Region +chrX 162381600 162384600 High Signal Region +chrX 164615100 164622200 High Signal Region +chrX 166063200 166084500 High Signal Region +chrX 167213400 167220200 High Signal Region +chrX 167246000 167252200 High Signal Region +chrX 169968900 171031200 High Signal Region +chrY 0 806800 High Signal Region +chrY 924800 1005300 High Signal Region +chrY 1276400 1813700 High Signal Region +chrY 1834500 1940700 High Signal Region +chrY 1973200 1996400 High Signal Region +chrY 2017200 2068000 Low Mappability +chrY 2104700 2210800 High Signal Region +chrY 2280300 2288900 Low Mappability +chrY 2471300 3819300 High Signal Region +chrY 3880300 4177100 High Signal Region +chrY 4249500 4289100 High Signal Region +chrY 4432000 4956300 High Signal Region +chrY 5062400 5227700 High Signal Region +chrY 6376700 6382700 High Signal Region +chrY 6530200 6663200 High Signal Region +chrY 6760200 6835800 High Signal Region +chrY 6984100 8985400 High Signal Region +chrY 10638500 41003800 High Signal Region +chrY 41159200 91744600 High Signal Region diff --git a/assets/blacklists/v3.0/GRCh38-blacklist.v3.bed b/assets/blacklists/v3.0/GRCh38-blacklist.v3.bed new file mode 100644 index 000000000..a29072530 --- /dev/null +++ b/assets/blacklists/v3.0/GRCh38-blacklist.v3.bed @@ -0,0 +1,910 @@ +1 628903 635104 +1 5850087 5850571 +1 8909610 8910014 +1 9574580 9574997 +1 32043823 32044203 +1 33818964 33819344 +1 38674335 38674715 +1 50017081 50017546 +1 52996949 52997329 +1 55372488 55372869 +1 67971776 67972156 +1 73258720 73259100 +1 76971068 76971595 +1 93936365 93936747 +1 93937447 93937827 +1 102160407 102160787 +1 103620975 103621378 +1 106803432 106803816 +1 106804021 106804224 +1 106804753 106805343 +1 121609948 125063427 +1 125166231 125184683 +1 143184599 143276861 +1 146992422 146992802 +1 158449073 158449453 +1 158872114 158872494 +1 159295111 159295493 +1 169473895 169474338 +1 170006204 170006584 +1 172710350 172710732 +1 181422611 181423158 +1 191961694 191962163 +1 195288048 195288429 +1 199487949 199488149 +1 214709795 214710175 +1 215499615 215500014 +1 226652017 226652398 +1 227699752 227700133 +1 229019365 229019745 +1 233139985 233140365 +1 235520204 235520404 +1 235537405 235537785 +1 235538899 235540112 +1 235540243 235540623 +1 235540886 235541649 +1 235870625 235871005 +1 237940595 237940979 +1 237941045 237941514 +1 237941893 237942746 +1 237943028 237943416 +1 237943490 237945232 +1 237945285 237946507 +1 237948983 237949365 +1 237951294 237951802 +10 2235555 2235756 +10 19746628 19747247 +10 19747314 19748342 +10 25638376 25638756 +10 26873147 26873538 +10 30565118 30565501 +10 36432964 36433344 +10 36434047 36435188 +10 37600616 37601002 +10 37601246 37601787 +10 37601884 37602850 +10 38481300 38596500 +10 38782600 38967900 +10 39000365 41916630 +10 42066792 42104971 +10 45577925 45578305 +10 46706229 46706611 +10 47633790 47634172 +10 55597861 55600059 +10 55626794 55627174 +10 57668682 57669062 +10 59261793 59262173 +10 69590538 69590738 +10 69591475 69591858 +10 69592355 69592740 +10 69592776 69593482 +10 69594378 69594760 +10 69595141 69595573 +10 69595681 69596061 +10 77166388 77166768 +10 79411056 79411468 +10 89786504 89786889 +10 100057235 100058064 +10 112894488 112894870 +10 115056512 115056712 +10 123032371 123032751 +10 125819621 125820001 +10 133689373 133689523 +11 8023287 8023667 +11 10507706 10510499 +11 10768339 10768719 +11 10815184 10815384 +11 24839563 24839944 +11 27850562 27850942 +11 47323881 47324333 +11 50424039 50813393 +11 51081363 54424064 +11 64187168 64187556 +11 65069483 65069863 +11 73510500 73510992 +11 81551734 81551934 +11 81553835 81554282 +11 81556152 81556537 +11 81556717 81557101 +11 87813427 87814320 +11 87815683 87816063 +11 103270627 103271007 +11 103403270 103403650 +11 103404014 103404527 +11 103404779 103405289 +11 103405809 103406376 +11 103406653 103407036 +11 103407110 103407310 +11 103408089 103409893 +11 103410074 103411211 +11 110876919 110877308 +11 114021166 114021546 +11 123003425 123003857 +11 123139919 123140301 +12 9923 10481 +12 2539174 2539982 +12 3887955 3888335 +12 19795477 19795864 +12 20769413 20769432 +12 21052950 21053330 +12 22005655 22006093 +12 27925108 27925488 +12 31247541 31247923 +12 31247963 31248343 +12 34665177 37429869 +12 40286245 40286625 +12 41363462 41363903 +12 41698591 41698971 +12 41699048 41699573 +12 49817252 49817634 +12 62773865 62774257 +12 80623983 80624183 +12 101486970 101487350 +12 123053921 123054301 +12 126583199 126583772 +12 126584137 126584530 +12 130315425 130315904 +13 16226300 18171400 +13 25984718 25984918 +13 31866923 31867303 +13 33516898 33517278 +13 36065385 36065836 +13 40768206 40768595 +13 53891451 53891831 +13 55971453 55971922 +13 56688341 56688749 +13 72344211 72344591 +13 75592084 75592468 +13 83688313 83688693 +13 84521524 84522274 +13 84522848 84523233 +13 88308157 88308357 +13 95692549 95692935 +13 95693013 95693215 +13 95694449 95695698 +13 95696145 95696512 +13 105488067 105488448 +13 107058662 107059042 +13 109423944 109424560 +14 16000600 18173660 +14 23426306 23426691 +14 32483953 32485298 +14 37490106 37490486 +14 40643840 40644220 +14 43116742 43117122 +14 45238635 45239016 +14 45430378 45430758 +14 46048457 46048837 +14 46847040 46847420 +14 51587295 51587847 +14 83587331 83587894 +14 83588229 83589060 +14 84171262 84171729 +14 84171838 84172846 +14 84173508 84173969 +14 84174279 84174691 +14 86498937 86499317 +15 17058500 19838644 +15 30477565 30477945 +15 32529779 32530159 +15 34715310 34715692 +15 35396110 35396495 +15 40133887 40134759 +15 41157028 41157408 +15 52100391 52100771 +15 54583731 54584111 +15 58152409 58153114 +15 58153292 58153690 +15 58155859 58156155 +15 67040730 67041122 +15 91960163 91960543 +16 3367430 3368546 +16 3369658 3370039 +16 3370150 3370542 +16 3370932 3371445 +16 3371688 3372222 +16 10719290 10720105 +16 10720417 10720781 +16 10721235 10721874 +16 10721900 10722280 +16 10723423 10723623 +16 10723815 10724200 +16 10724415 10724654 +16 20720929 20721312 +16 20721365 20721746 +16 20722103 20722552 +16 34071571 34071629 +16 34131996 34289269 +16 34571482 34597852 +16 34661168 34661267 +16 34919141 34919184 +16 35966577 38269112 +16 38275767 38280684 +16 46380676 46381095 +16 46386376 46386491 +16 46388622 46389053 +16 46390180 46390788 +16 46394471 46395088 +16 46398828 46401647 +16 60470624 60471006 +16 65701465 65701846 +16 67590312 67590692 +16 69358523 69358990 +16 73161120 73161500 +16 82119745 82120125 +17 141682 142062 +17 14171308 14171688 +17 15568187 15568567 +17 19597515 19597985 +17 19598613 19599532 +17 19599799 19600210 +17 19600300 19602064 +17 19602160 19602545 +17 19602886 19603595 +17 19603847 19604047 +17 19604922 19605588 +17 20851029 20851409 +17 21851150 21992060 +17 22519042 22520149 +17 22520322 22521025 +17 22521116 22526407 +17 22526636 22530152 +17 22530381 22532156 +17 22532315 22532940 +17 22551066 22551446 +17 22813591 26716670 +17 26885752 26885795 +17 35654769 35655182 +17 43251640 43251763 +17 43309853 43310048 +17 43315021 43316491 +17 43997535 43997957 +17 53105552 53106565 +17 54902920 54903301 +17 59279406 59279787 +17 63076394 63076777 +17 63393238 63393438 +17 65555244 65555624 +17 72316258 72316638 +17 80617407 80617802 +18 2842087 2842534 +18 8103913 8104113 +18 8846332 8846713 +18 15457976 20865732 +18 34571460 34571840 +18 47853089 47853617 +18 52883627 52884007 +18 59288306 59288686 +18 61874562 61874960 +18 77455900 77456280 +19 246899 247452 +19 12105016 12105399 +19 13362989 13363369 +19 24182199 27257542 +19 27741787 27741868 +19 36271917 36272148 +19 37572465 37572846 +19 37576134 37576516 +19 46122944 46123324 +19 47941356 47941426 +19 54794749 54795129 +19 56691535 56691736 +19 56922158 56922601 +2 638427 638808 +2 1087103 1087484 +2 16271753 16272134 +2 22316878 22317258 +2 24644617 24644997 +2 32916201 32916632 +2 33767290 33767703 +2 33964664 33965045 +2 36276769 36277149 +2 40784787 40785278 +2 49229452 49230058 +2 50588765 50589566 +2 54451654 54452034 +2 57648677 57649057 +2 67953669 67954049 +2 75063567 75063994 +2 81666317 81666849 +2 82814941 82815321 +2 82815451 82816236 +2 82816261 82816647 +2 82818378 82818748 +2 82820800 82821005 +2 85068666 85069046 +2 87824709 87825530 +2 89272789 89273133 +2 89827607 89827706 +2 89828636 89828710 +2 89828842 89828942 +2 89833685 89833793 +2 89839592 89839709 +2 89909317 89909789 +2 90379778 90402456 +2 92081223 92081398 +2 92188125 94293463 +2 94499181 94570956 +2 94898976 94899645 +2 94900639 94900840 +2 94901421 94901808 +2 97189431 97189813 +2 102482582 102482962 +2 102505606 102505987 +2 110072034 110072434 +2 110299106 110299346 +2 116751234 116751614 +2 116752004 116752448 +2 116752517 116752897 +2 117020171 117020552 +2 117021107 117022152 +2 117022438 117024038 +2 117024277 117025093 +2 117025205 117025670 +2 117026130 117026512 +2 120211535 120212064 +2 120212685 120213069 +2 120213761 120214143 +2 120214590 120215370 +2 121220135 121220515 +2 124680743 124681182 +2 125812046 125812548 +2 129090774 129091154 +2 130272174 130272615 +2 130273451 130273981 +2 130274326 130274992 +2 130275174 130275744 +2 130276119 130276500 +2 130277774 130278727 +2 130279995 130280729 +2 130280827 130281440 +2 130557359 130557607 +2 130563142 130563396 +2 131369643 131369925 +2 131370949 131371562 +2 131371916 131372361 +2 131372758 131373137 +2 131379317 131380344 +2 131381592 131381973 +2 131382344 131382728 +2 131382772 131382974 +2 131383079 131384016 +2 131384051 131384621 +2 131384898 131385281 +2 131385356 131385794 +2 140217229 140218044 +2 140220209 140220840 +2 140220940 140221140 +2 140221198 140222369 +2 140222545 140223623 +2 140223647 140224297 +2 143088644 143089042 +2 143089938 143090358 +2 143090898 143091662 +2 143092255 143092646 +2 143093556 143093941 +2 143094515 143094999 +2 143095614 143095994 +2 143096048 143096428 +2 143096470 143097336 +2 143097466 143097981 +2 143100621 143101005 +2 147048574 147048955 +2 147244849 147245229 +2 147265034 147265432 +2 148822913 148823295 +2 148881545 148882032 +2 155196092 155196473 +2 155263345 155264313 +2 155264362 155264562 +2 155264599 155264982 +2 155311420 155311995 +2 155313539 155313922 +2 156828628 156829008 +2 162517271 162517651 +2 164117001 164117382 +2 166414323 166414779 +2 167378863 167379244 +2 168652433 168652813 +2 179739184 179739689 +2 190593881 190594262 +2 196204680 196205060 +2 201212170 201212612 +2 201212648 201212854 +2 201212903 201213386 +2 201214659 201215040 +2 201549404 201549784 +2 201550130 201550513 +2 201557568 201557948 +2 202614117 202614527 +2 202615371 202615757 +2 202617016 202617398 +2 202618435 202618819 +2 202619754 202620134 +2 211773627 211774158 +2 211774322 211775192 +2 211775641 211776712 +2 211777034 211777417 +2 211777802 211778269 +2 211778916 211779562 +2 215573163 215573544 +2 226722088 226722596 +2 237521663 237522775 +2 237522862 237523652 +20 5999469 5999849 +20 9168743 9169145 +20 10441916 10442296 +20 13167142 13167534 +20 18449173 18449556 +20 22078162 22078542 +20 24024376 24024757 +20 26438448 28554562 +20 28644084 29015573 +20 29125977 29294639 +20 30744370 30744939 +20 30746748 30747241 +20 31051540 31106909 +20 31157044 31159116 +20 31161652 31223331 +20 34688743 34689039 +20 47894699 47896109 +20 57063873 57064279 +20 57357555 57358134 +20 57358221 57359428 +20 57359451 57360972 +20 63644937 63645318 +21 6369257 6372342 +21 7201205 7327885 +21 7919585 7919691 +21 8211710 8211892 +21 8212412 8212570 +21 8213694 8213987 +21 8219372 8220330 +21 8234456 8234568 +21 8394767 8394902 +21 8395471 8395591 +21 8396751 8397011 +21 8445918 8446080 +21 8446629 8446729 +21 8446925 8447070 +21 8595669 8595768 +21 8844362 8844855 +21 8846669 8847382 +21 10014674 10015194 +21 10650900 12965800 +21 16645305 16645685 +21 32095835 32096215 +21 35890413 35890796 +21 44474913 44475301 +21 45376056 45376517 +22 10863370 10863448 +22 11210951 11215489 +22 11854150 11854643 +22 11856460 11857173 +22 11974159 11974336 +22 12135181 12135894 +22 12137711 12138204 +22 12691742 12694097 +22 12954427 15057495 +22 15153934 15211502 +22 15940533 16085728 +22 32894952 32895345 +22 33819338 33819538 +22 35885491 35885898 +22 36172705 36173085 +22 36177875 36178257 +22 46470112 46470493 +22 50086003 50086529 +22 50806858 50808224 +3 3571912 3572292 +3 24705149 24705529 +3 25467328 25467722 +3 29797534 29797914 +3 33548103 33548483 +3 40252107 40253916 +3 41532177 41532556 +3 43229296 43229733 +3 68658875 68659467 +3 68670345 68670734 +3 73054640 73055020 +3 82655447 82655827 +3 89588895 89589538 +3 90269605 90722189 +3 90774880 91249595 +3 91519649 93657524 +3 93705477 93800019 +3 96475262 96475643 +3 96617014 96618680 +3 106894019 106894441 +3 106895181 106895568 +3 106896124 106896504 +3 106898661 106899022 +3 106899753 106900122 +3 106901799 106902741 +3 106903188 106903605 +3 119947198 119947578 +3 120721858 120722610 +3 122688557 122688938 +3 125982519 125982900 +3 127005357 127005745 +3 128988979 128989359 +3 137095968 137096348 +3 142662232 142662612 +3 152919604 152919995 +3 153658704 153659087 +3 160947473 160948127 +3 166159726 166160108 +3 166160260 166160644 +3 166161631 166162087 +3 166226563 166226945 +3 166232406 166232886 +3 166232970 166233355 +3 166474023 166474223 +3 171534313 171534700 +3 177010776 177011156 +3 192880587 192880967 +4 5404508 5404897 +4 12640142 12640815 +4 14506099 14506467 +4 17061824 17062213 +4 18949310 18949691 +4 22502173 22502553 +4 25717756 25718136 +4 25718275 25718655 +4 25719398 25719626 +4 27730251 27730747 +4 30884524 30884906 +4 32280109 32280489 +4 41023064 41023448 +4 47772100 47772544 +4 49136056 49136102 +4 49141052 49141147 +4 49246355 49246848 +4 49548607 49549100 +4 49631231 49658125 +4 49708086 51743949 +4 51793952 51817249 +4 55327979 55328462 +4 64606369 64606752 +4 64606841 64607360 +4 64607395 64607789 +4 64607976 64608801 +4 64608937 64609326 +4 64609811 64610876 +4 64611176 64611617 +4 66065193 66065631 +4 68050141 68050521 +4 68572333 68572774 +4 78008402 78008882 +4 83383282 83383662 +4 89731703 89732163 +4 92701787 92702300 +4 107501924 107502304 +4 112372589 112372969 +4 116296652 116297040 +4 116297165 116297545 +4 116297659 116298726 +4 116299003 116300416 +4 128081280 128081956 +4 140929567 140929947 +4 143017907 143018107 +4 143347973 143348354 +4 144379497 144379877 +4 155076906 155077288 +4 155452733 155452935 +4 155453928 155454313 +4 155454407 155455447 +4 155455566 155455766 +4 155457624 155458008 +4 155459547 155459747 +4 155460171 155460553 +4 155461093 155461689 +4 155462078 155463456 +4 155463701 155464839 +4 155464895 155465305 +4 155465580 155466624 +4 157628391 157628774 +4 160044429 160044815 +4 161449477 161449857 +4 161788291 161788671 +4 162421207 162421721 +4 172036714 172037094 +4 179069259 179069639 +4 183489243 183489623 +4 189844495 189844576 +5 12284 12523 +5 12952 13361 +5 5395563 5395943 +5 5396182 5396616 +5 5396675 5397057 +5 8619083 8619464 +5 8619927 8620307 +5 8620707 8621192 +5 8621953 8622333 +5 8622354 8622753 +5 32927394 32927776 +5 37164286 37164673 +5 45913363 50265419 +5 60761358 60762176 +5 66253509 66253889 +5 73775720 73776112 +5 79089860 79090240 +5 80649841 80652548 +5 94567275 94571098 +5 97678633 97679016 +5 98409947 98410327 +5 98410700 98411257 +5 99813005 99813388 +5 100045805 100055225 +5 106553187 106553689 +5 111488864 111489244 +5 119127218 119127602 +5 121030820 121031445 +5 122338658 122339042 +5 123760111 123760622 +5 123760719 123761918 +5 134923133 134928692 +5 136533606 136533986 +5 137305006 137305387 +5 152198765 152199145 +5 160600365 160600745 +5 163146853 163147234 +5 163959711 163960091 +5 164673914 164674288 +5 166530241 166530641 +5 170635389 170635774 +6 1705930 1706304 +6 3943769 3944149 +6 29454054 29454435 +6 32706020 32706850 +6 43490986 43491370 +6 54899048 54899248 +6 58554346 59830578 +6 61278527 61521106 +6 61573960 61574809 +6 72747981 72748361 +6 72799169 72799549 +6 76708390 76708770 +6 88555202 88555591 +6 91726616 91727363 +6 94446937 94447370 +6 96941571 96941951 +6 104699855 104700055 +6 114377334 114377534 +6 122764824 122765204 +6 126478329 126478709 +6 127735330 127735710 +6 132799554 132799939 +6 133150492 133150881 +6 133930809 133931190 +6 138133082 138133462 +6 143077647 143078031 +6 153666229 153666618 +6 153667363 153667744 +6 153668187 153668753 +6 153669025 153669419 +6 156547729 156548118 +6 163638068 163638448 +7 18021726 18022106 +7 22748471 22748854 +7 33749120 33749500 +7 36228567 36229008 +7 37387570 37387950 +7 45251808 45252289 +7 55369049 55369429 +7 57167688 57168071 +7 57168472 57168852 +7 57169046 57169430 +7 57169550 57169932 +7 57170307 57170523 +7 57170675 57171410 +7 57171502 57172122 +7 57173798 57174181 +7 57174854 57175239 +7 57185615 57185995 +7 57186105 57186589 +7 57187287 57188033 +7 57188305 57188872 +7 57189116 57189730 +7 57190949 57191332 +7 57191618 57191818 +7 57192132 57192860 +7 57193489 57193872 +7 57193974 57194701 +7 57194829 57195210 +7 57196302 57197490 +7 57198263 57198644 +7 57879605 58032504 +7 58166363 62995324 +7 63094673 63095057 +7 64104133 64104513 +7 64105294 64106415 +7 64106627 64107010 +7 64108329 64108798 +7 64110007 64110707 +7 64111376 64111804 +7 64111957 64112849 +7 67627830 67628213 +7 68097607 68097990 +7 68736347 68736811 +7 69331805 69332005 +7 69332037 69332438 +7 69333013 69333393 +7 69333597 69334167 +7 72088575 72088955 +7 83100026 83100406 +7 83469984 83470184 +7 83855080 83855464 +7 95851249 95851629 +7 104989516 104989896 +7 112372484 112372865 +7 112374724 112374950 +7 117263552 117264184 +7 117264231 117264614 +7 130116678 130117058 +7 141173000 141173384 +7 141801916 141802451 +7 141802901 141803366 +7 141804074 141804274 +7 141804814 141805507 +7 142665099 142667846 +7 143187483 143187863 +7 145997159 145997608 +7 150131843 150132229 +7 153968598 153968979 +7 159294463 159294846 +8 13353292 13353679 +8 16056863 16057063 +8 18849121 18849571 +8 20551162 20551554 +8 32805708 32806092 +8 33010514 33010894 +8 33011359 33014071 +8 33014510 33014895 +8 33015020 33015853 +8 36277446 36278060 +8 36278272 36278791 +8 36278835 36279634 +8 40070431 40070867 +8 43237631 43242390 +8 43937900 45969600 +8 46827305 46827914 +8 46828298 46829961 +8 46830195 46831222 +8 46837581 46837961 +8 46838101 46838484 +8 50758259 50758639 +8 56736733 56736933 +8 61303079 61303460 +8 67580689 67581493 +8 67581588 67581972 +8 67582178 67582568 +8 67585216 67585693 +8 67585787 67586175 +8 67587282 67587922 +8 69102851 69103234 +8 72985528 72985923 +8 74828644 74829025 +8 76201592 76202319 +8 76645407 76645800 +8 97907908 97908279 +8 99495689 99496133 +8 102774315 102774695 +8 103082925 103083379 +8 103083704 103084399 +8 103084730 103085110 +8 103085323 103085806 +8 103086859 103087242 +8 108533901 108534281 +8 110933150 110933533 +8 110934510 110935010 +8 111248936 111249316 +8 120224204 120224584 +8 127053876 127054257 +8 127968653 127969034 +8 133615761 133616142 +8 133755390 133755856 +9 5091131 5091511 +9 5091962 5093013 +9 5093063 5094123 +9 5094192 5094697 +9 5094931 5095816 +9 5096206 5096816 +9 5097188 5097890 +9 5098134 5098516 +9 5099352 5099552 +9 5100044 5100427 +9 5108063 5108592 +9 5109193 5109986 +9 5110030 5110411 +9 9896970 9897350 +9 15866612 15866992 +9 18336471 18336854 +9 31498260 31498640 +9 33656533 33658316 +9 33658346 33659299 +9 34998988 34999474 +9 36466192 36466572 +9 43153721 45525161 +9 64045550 64046043 +9 64047855 64048422 +9 65048153 65079624 +9 68251002 68251071 +9 72788174 72788555 +9 78741395 78741775 +9 78742155 78742969 +9 78743199 78743630 +9 78744108 78744492 +9 78810721 78811113 +9 79804550 79804933 +9 80564643 80565085 +9 80565478 80565941 +9 81747641 81748021 +9 82427689 82428071 +9 92108965 92109347 +9 92539106 92539763 +9 95876956 95877338 +9 117109914 117110296 +9 122505687 122506067 +9 129878699 129879081 +9 134164478 134165354 +9 134170819 134171060 +X 4059512 4059712 +X 5168678 5169232 +X 5169733 5170646 +X 15727702 15728089 +X 17116414 17116794 +X 24056083 24056470 +X 24375345 24375545 +X 33762401 33762781 +X 55178596 55179289 +X 55179434 55180459 +X 55181196 55182790 +X 55183051 55184112 +X 58061543 62821716 +X 62841379 62841765 +X 62842257 62842639 +X 70119464 70119845 +X 70127233 70127620 +X 77501934 77502314 +X 78561721 78561921 +X 84403779 84404168 +X 100027094 100027475 +X 102010329 102010712 +X 102011531 102011915 +X 102772405 102772791 +X 102785904 102786287 +X 102798001 102798386 +X 102802747 102803161 +X 102809395 102809788 +X 104409869 104410249 +X 106239694 106239894 +X 111416893 111417294 +X 126471558 126473451 +X 126728884 126729272 +X 126729326 126729709 +X 126729837 126730217 +X 126730716 126731106 +X 126731624 126732029 +X 129983338 129983538 +X 133041871 133042251 +X 135292293 135292493 +X 143430213 143430837 +X 143431144 143431537 +X 143431716 143432219 +X 143432410 143433212 +X 143433510 143434156 +X 143543636 143544023 +X 146995842 146996224 +Y 4344757 4344879 +Y 9141870 9141995 +Y 10203380 10266932 +Y 10316749 10544446 +Y 10594583 10626838 +Y 10663669 10663716 +Y 10744417 10921497 +Y 11290797 11334278 +Y 11493053 11592850 +Y 11671014 11671046 +Y 11721528 11749472 +Y 56694632 56889743 diff --git a/assets/blacklists/v3.0/hg38-blacklist.v3.bed b/assets/blacklists/v3.0/hg38-blacklist.v3.bed new file mode 100644 index 000000000..4e386f455 --- /dev/null +++ b/assets/blacklists/v3.0/hg38-blacklist.v3.bed @@ -0,0 +1,910 @@ +chr1 628903 635104 +chr1 5850087 5850571 +chr1 8909610 8910014 +chr1 9574580 9574997 +chr1 32043823 32044203 +chr1 33818964 33819344 +chr1 38674335 38674715 +chr1 50017081 50017546 +chr1 52996949 52997329 +chr1 55372488 55372869 +chr1 67971776 67972156 +chr1 73258720 73259100 +chr1 76971068 76971595 +chr1 93936365 93936747 +chr1 93937447 93937827 +chr1 102160407 102160787 +chr1 103620975 103621378 +chr1 106803432 106803816 +chr1 106804021 106804224 +chr1 106804753 106805343 +chr1 121609948 125063427 +chr1 125166231 125184683 +chr1 143184599 143276861 +chr1 146992422 146992802 +chr1 158449073 158449453 +chr1 158872114 158872494 +chr1 159295111 159295493 +chr1 169473895 169474338 +chr1 170006204 170006584 +chr1 172710350 172710732 +chr1 181422611 181423158 +chr1 191961694 191962163 +chr1 195288048 195288429 +chr1 199487949 199488149 +chr1 214709795 214710175 +chr1 215499615 215500014 +chr1 226652017 226652398 +chr1 227699752 227700133 +chr1 229019365 229019745 +chr1 233139985 233140365 +chr1 235520204 235520404 +chr1 235537405 235537785 +chr1 235538899 235540112 +chr1 235540243 235540623 +chr1 235540886 235541649 +chr1 235870625 235871005 +chr1 237940595 237940979 +chr1 237941045 237941514 +chr1 237941893 237942746 +chr1 237943028 237943416 +chr1 237943490 237945232 +chr1 237945285 237946507 +chr1 237948983 237949365 +chr1 237951294 237951802 +chr10 2235555 2235756 +chr10 19746628 19747247 +chr10 19747314 19748342 +chr10 25638376 25638756 +chr10 26873147 26873538 +chr10 30565118 30565501 +chr10 36432964 36433344 +chr10 36434047 36435188 +chr10 37600616 37601002 +chr10 37601246 37601787 +chr10 37601884 37602850 +chr10 38481300 38596500 +chr10 38782600 38967900 +chr10 39000365 41916630 +chr10 42066792 42104971 +chr10 45577925 45578305 +chr10 46706229 46706611 +chr10 47633790 47634172 +chr10 55597861 55600059 +chr10 55626794 55627174 +chr10 57668682 57669062 +chr10 59261793 59262173 +chr10 69590538 69590738 +chr10 69591475 69591858 +chr10 69592355 69592740 +chr10 69592776 69593482 +chr10 69594378 69594760 +chr10 69595141 69595573 +chr10 69595681 69596061 +chr10 77166388 77166768 +chr10 79411056 79411468 +chr10 89786504 89786889 +chr10 100057235 100058064 +chr10 112894488 112894870 +chr10 115056512 115056712 +chr10 123032371 123032751 +chr10 125819621 125820001 +chr10 133689373 133689523 +chr11 8023287 8023667 +chr11 10507706 10510499 +chr11 10768339 10768719 +chr11 10815184 10815384 +chr11 24839563 24839944 +chr11 27850562 27850942 +chr11 47323881 47324333 +chr11 50424039 50813393 +chr11 51081363 54424064 +chr11 64187168 64187556 +chr11 65069483 65069863 +chr11 73510500 73510992 +chr11 81551734 81551934 +chr11 81553835 81554282 +chr11 81556152 81556537 +chr11 81556717 81557101 +chr11 87813427 87814320 +chr11 87815683 87816063 +chr11 103270627 103271007 +chr11 103403270 103403650 +chr11 103404014 103404527 +chr11 103404779 103405289 +chr11 103405809 103406376 +chr11 103406653 103407036 +chr11 103407110 103407310 +chr11 103408089 103409893 +chr11 103410074 103411211 +chr11 110876919 110877308 +chr11 114021166 114021546 +chr11 123003425 123003857 +chr11 123139919 123140301 +chr12 9923 10481 +chr12 2539174 2539982 +chr12 3887955 3888335 +chr12 19795477 19795864 +chr12 20769413 20769432 +chr12 21052950 21053330 +chr12 22005655 22006093 +chr12 27925108 27925488 +chr12 31247541 31247923 +chr12 31247963 31248343 +chr12 34665177 37429869 +chr12 40286245 40286625 +chr12 41363462 41363903 +chr12 41698591 41698971 +chr12 41699048 41699573 +chr12 49817252 49817634 +chr12 62773865 62774257 +chr12 80623983 80624183 +chr12 101486970 101487350 +chr12 123053921 123054301 +chr12 126583199 126583772 +chr12 126584137 126584530 +chr12 130315425 130315904 +chr13 16226300 18171400 +chr13 25984718 25984918 +chr13 31866923 31867303 +chr13 33516898 33517278 +chr13 36065385 36065836 +chr13 40768206 40768595 +chr13 53891451 53891831 +chr13 55971453 55971922 +chr13 56688341 56688749 +chr13 72344211 72344591 +chr13 75592084 75592468 +chr13 83688313 83688693 +chr13 84521524 84522274 +chr13 84522848 84523233 +chr13 88308157 88308357 +chr13 95692549 95692935 +chr13 95693013 95693215 +chr13 95694449 95695698 +chr13 95696145 95696512 +chr13 105488067 105488448 +chr13 107058662 107059042 +chr13 109423944 109424560 +chr14 16000600 18173660 +chr14 23426306 23426691 +chr14 32483953 32485298 +chr14 37490106 37490486 +chr14 40643840 40644220 +chr14 43116742 43117122 +chr14 45238635 45239016 +chr14 45430378 45430758 +chr14 46048457 46048837 +chr14 46847040 46847420 +chr14 51587295 51587847 +chr14 83587331 83587894 +chr14 83588229 83589060 +chr14 84171262 84171729 +chr14 84171838 84172846 +chr14 84173508 84173969 +chr14 84174279 84174691 +chr14 86498937 86499317 +chr15 17058500 19838644 +chr15 30477565 30477945 +chr15 32529779 32530159 +chr15 34715310 34715692 +chr15 35396110 35396495 +chr15 40133887 40134759 +chr15 41157028 41157408 +chr15 52100391 52100771 +chr15 54583731 54584111 +chr15 58152409 58153114 +chr15 58153292 58153690 +chr15 58155859 58156155 +chr15 67040730 67041122 +chr15 91960163 91960543 +chr16 3367430 3368546 +chr16 3369658 3370039 +chr16 3370150 3370542 +chr16 3370932 3371445 +chr16 3371688 3372222 +chr16 10719290 10720105 +chr16 10720417 10720781 +chr16 10721235 10721874 +chr16 10721900 10722280 +chr16 10723423 10723623 +chr16 10723815 10724200 +chr16 10724415 10724654 +chr16 20720929 20721312 +chr16 20721365 20721746 +chr16 20722103 20722552 +chr16 34071571 34071629 +chr16 34131996 34289269 +chr16 34571482 34597852 +chr16 34661168 34661267 +chr16 34919141 34919184 +chr16 35966577 38269112 +chr16 38275767 38280684 +chr16 46380676 46381095 +chr16 46386376 46386491 +chr16 46388622 46389053 +chr16 46390180 46390788 +chr16 46394471 46395088 +chr16 46398828 46401647 +chr16 60470624 60471006 +chr16 65701465 65701846 +chr16 67590312 67590692 +chr16 69358523 69358990 +chr16 73161120 73161500 +chr16 82119745 82120125 +chr17 141682 142062 +chr17 14171308 14171688 +chr17 15568187 15568567 +chr17 19597515 19597985 +chr17 19598613 19599532 +chr17 19599799 19600210 +chr17 19600300 19602064 +chr17 19602160 19602545 +chr17 19602886 19603595 +chr17 19603847 19604047 +chr17 19604922 19605588 +chr17 20851029 20851409 +chr17 21851150 21992060 +chr17 22519042 22520149 +chr17 22520322 22521025 +chr17 22521116 22526407 +chr17 22526636 22530152 +chr17 22530381 22532156 +chr17 22532315 22532940 +chr17 22551066 22551446 +chr17 22813591 26716670 +chr17 26885752 26885795 +chr17 35654769 35655182 +chr17 43251640 43251763 +chr17 43309853 43310048 +chr17 43315021 43316491 +chr17 43997535 43997957 +chr17 53105552 53106565 +chr17 54902920 54903301 +chr17 59279406 59279787 +chr17 63076394 63076777 +chr17 63393238 63393438 +chr17 65555244 65555624 +chr17 72316258 72316638 +chr17 80617407 80617802 +chr18 2842087 2842534 +chr18 8103913 8104113 +chr18 8846332 8846713 +chr18 15457976 20865732 +chr18 34571460 34571840 +chr18 47853089 47853617 +chr18 52883627 52884007 +chr18 59288306 59288686 +chr18 61874562 61874960 +chr18 77455900 77456280 +chr19 246899 247452 +chr19 12105016 12105399 +chr19 13362989 13363369 +chr19 24182199 27257542 +chr19 27741787 27741868 +chr19 36271917 36272148 +chr19 37572465 37572846 +chr19 37576134 37576516 +chr19 46122944 46123324 +chr19 47941356 47941426 +chr19 54794749 54795129 +chr19 56691535 56691736 +chr19 56922158 56922601 +chr2 638427 638808 +chr2 1087103 1087484 +chr2 16271753 16272134 +chr2 22316878 22317258 +chr2 24644617 24644997 +chr2 32916201 32916632 +chr2 33767290 33767703 +chr2 33964664 33965045 +chr2 36276769 36277149 +chr2 40784787 40785278 +chr2 49229452 49230058 +chr2 50588765 50589566 +chr2 54451654 54452034 +chr2 57648677 57649057 +chr2 67953669 67954049 +chr2 75063567 75063994 +chr2 81666317 81666849 +chr2 82814941 82815321 +chr2 82815451 82816236 +chr2 82816261 82816647 +chr2 82818378 82818748 +chr2 82820800 82821005 +chr2 85068666 85069046 +chr2 87824709 87825530 +chr2 89272789 89273133 +chr2 89827607 89827706 +chr2 89828636 89828710 +chr2 89828842 89828942 +chr2 89833685 89833793 +chr2 89839592 89839709 +chr2 89909317 89909789 +chr2 90379778 90402456 +chr2 92081223 92081398 +chr2 92188125 94293463 +chr2 94499181 94570956 +chr2 94898976 94899645 +chr2 94900639 94900840 +chr2 94901421 94901808 +chr2 97189431 97189813 +chr2 102482582 102482962 +chr2 102505606 102505987 +chr2 110072034 110072434 +chr2 110299106 110299346 +chr2 116751234 116751614 +chr2 116752004 116752448 +chr2 116752517 116752897 +chr2 117020171 117020552 +chr2 117021107 117022152 +chr2 117022438 117024038 +chr2 117024277 117025093 +chr2 117025205 117025670 +chr2 117026130 117026512 +chr2 120211535 120212064 +chr2 120212685 120213069 +chr2 120213761 120214143 +chr2 120214590 120215370 +chr2 121220135 121220515 +chr2 124680743 124681182 +chr2 125812046 125812548 +chr2 129090774 129091154 +chr2 130272174 130272615 +chr2 130273451 130273981 +chr2 130274326 130274992 +chr2 130275174 130275744 +chr2 130276119 130276500 +chr2 130277774 130278727 +chr2 130279995 130280729 +chr2 130280827 130281440 +chr2 130557359 130557607 +chr2 130563142 130563396 +chr2 131369643 131369925 +chr2 131370949 131371562 +chr2 131371916 131372361 +chr2 131372758 131373137 +chr2 131379317 131380344 +chr2 131381592 131381973 +chr2 131382344 131382728 +chr2 131382772 131382974 +chr2 131383079 131384016 +chr2 131384051 131384621 +chr2 131384898 131385281 +chr2 131385356 131385794 +chr2 140217229 140218044 +chr2 140220209 140220840 +chr2 140220940 140221140 +chr2 140221198 140222369 +chr2 140222545 140223623 +chr2 140223647 140224297 +chr2 143088644 143089042 +chr2 143089938 143090358 +chr2 143090898 143091662 +chr2 143092255 143092646 +chr2 143093556 143093941 +chr2 143094515 143094999 +chr2 143095614 143095994 +chr2 143096048 143096428 +chr2 143096470 143097336 +chr2 143097466 143097981 +chr2 143100621 143101005 +chr2 147048574 147048955 +chr2 147244849 147245229 +chr2 147265034 147265432 +chr2 148822913 148823295 +chr2 148881545 148882032 +chr2 155196092 155196473 +chr2 155263345 155264313 +chr2 155264362 155264562 +chr2 155264599 155264982 +chr2 155311420 155311995 +chr2 155313539 155313922 +chr2 156828628 156829008 +chr2 162517271 162517651 +chr2 164117001 164117382 +chr2 166414323 166414779 +chr2 167378863 167379244 +chr2 168652433 168652813 +chr2 179739184 179739689 +chr2 190593881 190594262 +chr2 196204680 196205060 +chr2 201212170 201212612 +chr2 201212648 201212854 +chr2 201212903 201213386 +chr2 201214659 201215040 +chr2 201549404 201549784 +chr2 201550130 201550513 +chr2 201557568 201557948 +chr2 202614117 202614527 +chr2 202615371 202615757 +chr2 202617016 202617398 +chr2 202618435 202618819 +chr2 202619754 202620134 +chr2 211773627 211774158 +chr2 211774322 211775192 +chr2 211775641 211776712 +chr2 211777034 211777417 +chr2 211777802 211778269 +chr2 211778916 211779562 +chr2 215573163 215573544 +chr2 226722088 226722596 +chr2 237521663 237522775 +chr2 237522862 237523652 +chr20 5999469 5999849 +chr20 9168743 9169145 +chr20 10441916 10442296 +chr20 13167142 13167534 +chr20 18449173 18449556 +chr20 22078162 22078542 +chr20 24024376 24024757 +chr20 26438448 28554562 +chr20 28644084 29015573 +chr20 29125977 29294639 +chr20 30744370 30744939 +chr20 30746748 30747241 +chr20 31051540 31106909 +chr20 31157044 31159116 +chr20 31161652 31223331 +chr20 34688743 34689039 +chr20 47894699 47896109 +chr20 57063873 57064279 +chr20 57357555 57358134 +chr20 57358221 57359428 +chr20 57359451 57360972 +chr20 63644937 63645318 +chr21 6369257 6372342 +chr21 7201205 7327885 +chr21 7919585 7919691 +chr21 8211710 8211892 +chr21 8212412 8212570 +chr21 8213694 8213987 +chr21 8219372 8220330 +chr21 8234456 8234568 +chr21 8394767 8394902 +chr21 8395471 8395591 +chr21 8396751 8397011 +chr21 8445918 8446080 +chr21 8446629 8446729 +chr21 8446925 8447070 +chr21 8595669 8595768 +chr21 8844362 8844855 +chr21 8846669 8847382 +chr21 10014674 10015194 +chr21 10650900 12965800 +chr21 16645305 16645685 +chr21 32095835 32096215 +chr21 35890413 35890796 +chr21 44474913 44475301 +chr21 45376056 45376517 +chr22 10863370 10863448 +chr22 11210951 11215489 +chr22 11854150 11854643 +chr22 11856460 11857173 +chr22 11974159 11974336 +chr22 12135181 12135894 +chr22 12137711 12138204 +chr22 12691742 12694097 +chr22 12954427 15057495 +chr22 15153934 15211502 +chr22 15940533 16085728 +chr22 32894952 32895345 +chr22 33819338 33819538 +chr22 35885491 35885898 +chr22 36172705 36173085 +chr22 36177875 36178257 +chr22 46470112 46470493 +chr22 50086003 50086529 +chr22 50806858 50808224 +chr3 3571912 3572292 +chr3 24705149 24705529 +chr3 25467328 25467722 +chr3 29797534 29797914 +chr3 33548103 33548483 +chr3 40252107 40253916 +chr3 41532177 41532556 +chr3 43229296 43229733 +chr3 68658875 68659467 +chr3 68670345 68670734 +chr3 73054640 73055020 +chr3 82655447 82655827 +chr3 89588895 89589538 +chr3 90269605 90722189 +chr3 90774880 91249595 +chr3 91519649 93657524 +chr3 93705477 93800019 +chr3 96475262 96475643 +chr3 96617014 96618680 +chr3 106894019 106894441 +chr3 106895181 106895568 +chr3 106896124 106896504 +chr3 106898661 106899022 +chr3 106899753 106900122 +chr3 106901799 106902741 +chr3 106903188 106903605 +chr3 119947198 119947578 +chr3 120721858 120722610 +chr3 122688557 122688938 +chr3 125982519 125982900 +chr3 127005357 127005745 +chr3 128988979 128989359 +chr3 137095968 137096348 +chr3 142662232 142662612 +chr3 152919604 152919995 +chr3 153658704 153659087 +chr3 160947473 160948127 +chr3 166159726 166160108 +chr3 166160260 166160644 +chr3 166161631 166162087 +chr3 166226563 166226945 +chr3 166232406 166232886 +chr3 166232970 166233355 +chr3 166474023 166474223 +chr3 171534313 171534700 +chr3 177010776 177011156 +chr3 192880587 192880967 +chr4 5404508 5404897 +chr4 12640142 12640815 +chr4 14506099 14506467 +chr4 17061824 17062213 +chr4 18949310 18949691 +chr4 22502173 22502553 +chr4 25717756 25718136 +chr4 25718275 25718655 +chr4 25719398 25719626 +chr4 27730251 27730747 +chr4 30884524 30884906 +chr4 32280109 32280489 +chr4 41023064 41023448 +chr4 47772100 47772544 +chr4 49136056 49136102 +chr4 49141052 49141147 +chr4 49246355 49246848 +chr4 49548607 49549100 +chr4 49631231 49658125 +chr4 49708086 51743949 +chr4 51793952 51817249 +chr4 55327979 55328462 +chr4 64606369 64606752 +chr4 64606841 64607360 +chr4 64607395 64607789 +chr4 64607976 64608801 +chr4 64608937 64609326 +chr4 64609811 64610876 +chr4 64611176 64611617 +chr4 66065193 66065631 +chr4 68050141 68050521 +chr4 68572333 68572774 +chr4 78008402 78008882 +chr4 83383282 83383662 +chr4 89731703 89732163 +chr4 92701787 92702300 +chr4 107501924 107502304 +chr4 112372589 112372969 +chr4 116296652 116297040 +chr4 116297165 116297545 +chr4 116297659 116298726 +chr4 116299003 116300416 +chr4 128081280 128081956 +chr4 140929567 140929947 +chr4 143017907 143018107 +chr4 143347973 143348354 +chr4 144379497 144379877 +chr4 155076906 155077288 +chr4 155452733 155452935 +chr4 155453928 155454313 +chr4 155454407 155455447 +chr4 155455566 155455766 +chr4 155457624 155458008 +chr4 155459547 155459747 +chr4 155460171 155460553 +chr4 155461093 155461689 +chr4 155462078 155463456 +chr4 155463701 155464839 +chr4 155464895 155465305 +chr4 155465580 155466624 +chr4 157628391 157628774 +chr4 160044429 160044815 +chr4 161449477 161449857 +chr4 161788291 161788671 +chr4 162421207 162421721 +chr4 172036714 172037094 +chr4 179069259 179069639 +chr4 183489243 183489623 +chr4 189844495 189844576 +chr5 12284 12523 +chr5 12952 13361 +chr5 5395563 5395943 +chr5 5396182 5396616 +chr5 5396675 5397057 +chr5 8619083 8619464 +chr5 8619927 8620307 +chr5 8620707 8621192 +chr5 8621953 8622333 +chr5 8622354 8622753 +chr5 32927394 32927776 +chr5 37164286 37164673 +chr5 45913363 50265419 +chr5 60761358 60762176 +chr5 66253509 66253889 +chr5 73775720 73776112 +chr5 79089860 79090240 +chr5 80649841 80652548 +chr5 94567275 94571098 +chr5 97678633 97679016 +chr5 98409947 98410327 +chr5 98410700 98411257 +chr5 99813005 99813388 +chr5 100045805 100055225 +chr5 106553187 106553689 +chr5 111488864 111489244 +chr5 119127218 119127602 +chr5 121030820 121031445 +chr5 122338658 122339042 +chr5 123760111 123760622 +chr5 123760719 123761918 +chr5 134923133 134928692 +chr5 136533606 136533986 +chr5 137305006 137305387 +chr5 152198765 152199145 +chr5 160600365 160600745 +chr5 163146853 163147234 +chr5 163959711 163960091 +chr5 164673914 164674288 +chr5 166530241 166530641 +chr5 170635389 170635774 +chr6 1705930 1706304 +chr6 3943769 3944149 +chr6 29454054 29454435 +chr6 32706020 32706850 +chr6 43490986 43491370 +chr6 54899048 54899248 +chr6 58554346 59830578 +chr6 61278527 61521106 +chr6 61573960 61574809 +chr6 72747981 72748361 +chr6 72799169 72799549 +chr6 76708390 76708770 +chr6 88555202 88555591 +chr6 91726616 91727363 +chr6 94446937 94447370 +chr6 96941571 96941951 +chr6 104699855 104700055 +chr6 114377334 114377534 +chr6 122764824 122765204 +chr6 126478329 126478709 +chr6 127735330 127735710 +chr6 132799554 132799939 +chr6 133150492 133150881 +chr6 133930809 133931190 +chr6 138133082 138133462 +chr6 143077647 143078031 +chr6 153666229 153666618 +chr6 153667363 153667744 +chr6 153668187 153668753 +chr6 153669025 153669419 +chr6 156547729 156548118 +chr6 163638068 163638448 +chr7 18021726 18022106 +chr7 22748471 22748854 +chr7 33749120 33749500 +chr7 36228567 36229008 +chr7 37387570 37387950 +chr7 45251808 45252289 +chr7 55369049 55369429 +chr7 57167688 57168071 +chr7 57168472 57168852 +chr7 57169046 57169430 +chr7 57169550 57169932 +chr7 57170307 57170523 +chr7 57170675 57171410 +chr7 57171502 57172122 +chr7 57173798 57174181 +chr7 57174854 57175239 +chr7 57185615 57185995 +chr7 57186105 57186589 +chr7 57187287 57188033 +chr7 57188305 57188872 +chr7 57189116 57189730 +chr7 57190949 57191332 +chr7 57191618 57191818 +chr7 57192132 57192860 +chr7 57193489 57193872 +chr7 57193974 57194701 +chr7 57194829 57195210 +chr7 57196302 57197490 +chr7 57198263 57198644 +chr7 57879605 58032504 +chr7 58166363 62995324 +chr7 63094673 63095057 +chr7 64104133 64104513 +chr7 64105294 64106415 +chr7 64106627 64107010 +chr7 64108329 64108798 +chr7 64110007 64110707 +chr7 64111376 64111804 +chr7 64111957 64112849 +chr7 67627830 67628213 +chr7 68097607 68097990 +chr7 68736347 68736811 +chr7 69331805 69332005 +chr7 69332037 69332438 +chr7 69333013 69333393 +chr7 69333597 69334167 +chr7 72088575 72088955 +chr7 83100026 83100406 +chr7 83469984 83470184 +chr7 83855080 83855464 +chr7 95851249 95851629 +chr7 104989516 104989896 +chr7 112372484 112372865 +chr7 112374724 112374950 +chr7 117263552 117264184 +chr7 117264231 117264614 +chr7 130116678 130117058 +chr7 141173000 141173384 +chr7 141801916 141802451 +chr7 141802901 141803366 +chr7 141804074 141804274 +chr7 141804814 141805507 +chr7 142665099 142667846 +chr7 143187483 143187863 +chr7 145997159 145997608 +chr7 150131843 150132229 +chr7 153968598 153968979 +chr7 159294463 159294846 +chr8 13353292 13353679 +chr8 16056863 16057063 +chr8 18849121 18849571 +chr8 20551162 20551554 +chr8 32805708 32806092 +chr8 33010514 33010894 +chr8 33011359 33014071 +chr8 33014510 33014895 +chr8 33015020 33015853 +chr8 36277446 36278060 +chr8 36278272 36278791 +chr8 36278835 36279634 +chr8 40070431 40070867 +chr8 43237631 43242390 +chr8 43937900 45969600 +chr8 46827305 46827914 +chr8 46828298 46829961 +chr8 46830195 46831222 +chr8 46837581 46837961 +chr8 46838101 46838484 +chr8 50758259 50758639 +chr8 56736733 56736933 +chr8 61303079 61303460 +chr8 67580689 67581493 +chr8 67581588 67581972 +chr8 67582178 67582568 +chr8 67585216 67585693 +chr8 67585787 67586175 +chr8 67587282 67587922 +chr8 69102851 69103234 +chr8 72985528 72985923 +chr8 74828644 74829025 +chr8 76201592 76202319 +chr8 76645407 76645800 +chr8 97907908 97908279 +chr8 99495689 99496133 +chr8 102774315 102774695 +chr8 103082925 103083379 +chr8 103083704 103084399 +chr8 103084730 103085110 +chr8 103085323 103085806 +chr8 103086859 103087242 +chr8 108533901 108534281 +chr8 110933150 110933533 +chr8 110934510 110935010 +chr8 111248936 111249316 +chr8 120224204 120224584 +chr8 127053876 127054257 +chr8 127968653 127969034 +chr8 133615761 133616142 +chr8 133755390 133755856 +chr9 5091131 5091511 +chr9 5091962 5093013 +chr9 5093063 5094123 +chr9 5094192 5094697 +chr9 5094931 5095816 +chr9 5096206 5096816 +chr9 5097188 5097890 +chr9 5098134 5098516 +chr9 5099352 5099552 +chr9 5100044 5100427 +chr9 5108063 5108592 +chr9 5109193 5109986 +chr9 5110030 5110411 +chr9 9896970 9897350 +chr9 15866612 15866992 +chr9 18336471 18336854 +chr9 31498260 31498640 +chr9 33656533 33658316 +chr9 33658346 33659299 +chr9 34998988 34999474 +chr9 36466192 36466572 +chr9 43153721 45525161 +chr9 64045550 64046043 +chr9 64047855 64048422 +chr9 65048153 65079624 +chr9 68251002 68251071 +chr9 72788174 72788555 +chr9 78741395 78741775 +chr9 78742155 78742969 +chr9 78743199 78743630 +chr9 78744108 78744492 +chr9 78810721 78811113 +chr9 79804550 79804933 +chr9 80564643 80565085 +chr9 80565478 80565941 +chr9 81747641 81748021 +chr9 82427689 82428071 +chr9 92108965 92109347 +chr9 92539106 92539763 +chr9 95876956 95877338 +chr9 117109914 117110296 +chr9 122505687 122506067 +chr9 129878699 129879081 +chr9 134164478 134165354 +chr9 134170819 134171060 +chrX 4059512 4059712 +chrX 5168678 5169232 +chrX 5169733 5170646 +chrX 15727702 15728089 +chrX 17116414 17116794 +chrX 24056083 24056470 +chrX 24375345 24375545 +chrX 33762401 33762781 +chrX 55178596 55179289 +chrX 55179434 55180459 +chrX 55181196 55182790 +chrX 55183051 55184112 +chrX 58061543 62821716 +chrX 62841379 62841765 +chrX 62842257 62842639 +chrX 70119464 70119845 +chrX 70127233 70127620 +chrX 77501934 77502314 +chrX 78561721 78561921 +chrX 84403779 84404168 +chrX 100027094 100027475 +chrX 102010329 102010712 +chrX 102011531 102011915 +chrX 102772405 102772791 +chrX 102785904 102786287 +chrX 102798001 102798386 +chrX 102802747 102803161 +chrX 102809395 102809788 +chrX 104409869 104410249 +chrX 106239694 106239894 +chrX 111416893 111417294 +chrX 126471558 126473451 +chrX 126728884 126729272 +chrX 126729326 126729709 +chrX 126729837 126730217 +chrX 126730716 126731106 +chrX 126731624 126732029 +chrX 129983338 129983538 +chrX 133041871 133042251 +chrX 135292293 135292493 +chrX 143430213 143430837 +chrX 143431144 143431537 +chrX 143431716 143432219 +chrX 143432410 143433212 +chrX 143433510 143434156 +chrX 143543636 143544023 +chrX 146995842 146996224 +chrY 4344757 4344879 +chrY 9141870 9141995 +chrY 10203380 10266932 +chrY 10316749 10544446 +chrY 10594583 10626838 +chrY 10663669 10663716 +chrY 10744417 10921497 +chrY 11290797 11334278 +chrY 11493053 11592850 +chrY 11671014 11671046 +chrY 11721528 11749472 +chrY 56694632 56889743 diff --git a/assets/design_pe.csv b/assets/design_pe.csv deleted file mode 100644 index 6e6eae735..000000000 --- a/assets/design_pe.csv +++ /dev/null @@ -1,21 +0,0 @@ -group,replicate,fastq_1,fastq_2,antibody,control -WT_BCATENIN_IP,1,BLA203A1_S27_L006_R1_001.fastq.gz,BLA203A1_S27_L006_R2_001.fastq.gz,BCATENIN,WT_INPUT -WT_BCATENIN_IP,2,BLA203A25_S16_L001_R1_001.fastq.gz,BLA203A25_S16_L001_R2_001.fastq.gz,BCATENIN,WT_INPUT -WT_BCATENIN_IP,2,BLA203A25_S16_L002_R1_001.fastq.gz,BLA203A25_S16_L002_R2_001.fastq.gz,BCATENIN,WT_INPUT -WT_BCATENIN_IP,3,BLA203A49_S40_L001_R1_001.fastq.gz,BLA203A49_S40_L001_R2_001.fastq.gz,BCATENIN,WT_INPUT -NAIVE_BCATENIN_IP,1,BLA203A7_S60_L001_R1_001.fastq.gz,BLA203A7_S60_L001_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP,2,BLA203A43_S34_L001_R1_001.fastq.gz,BLA203A43_S34_L001_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP,2,BLA203A43_S34_L002_R1_001.fastq.gz,BLA203A43_S34_L002_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP,3,BLA203A64_S55_L001_R1_001.fastq.gz,BLA203A64_S55_L001_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT -WT_TCF4_IP,1,BLA203A3_S29_L006_R1_001.fastq.gz,BLA203A3_S29_L006_R2_001.fastq.gz,TCF4,WT_INPUT -WT_TCF4_IP,2,BLA203A27_S18_L001_R1_001.fastq.gz,BLA203A27_S18_L001_R2_001.fastq.gz,TCF4,WT_INPUT -WT_TCF4_IP,3,BLA203A51_S42_L001_R1_001.fastq.gz,BLA203A51_S42_L001_R2_001.fastq.gz,TCF4,WT_INPUT -NAIVE_TCF4_IP,1,BLA203A9_S62_L001_R1_001.fastq.gz,BLA203A9_S62_L001_R2_001.fastq.gz,TCF4,NAIVE_INPUT -NAIVE_TCF4_IP,2,BLA203A45_S36_L001_R1_001.fastq.gz,BLA203A45_S36_L001_R2_001.fastq.gz,TCF4,NAIVE_INPUT -NAIVE_TCF4_IP,3,BLA203A66_S57_L001_R1_001.fastq.gz,BLA203A66_S57_L001_R2_001.fastq.gz,TCF4,NAIVE_INPUT -WT_INPUT,1,BLA203A6_S32_L006_R1_001.fastq.gz,BLA203A6_S32_L006_R2_001.fastq.gz,, -WT_INPUT,2,BLA203A30_S21_L001_R1_001.fastq.gz,BLA203A30_S21_L001_R2_001.fastq.gz,, -WT_INPUT,3,BLA203A31_S21_L003_R1_001.fastq.gz,BLA203A31_S21_L003_R2_001.fastq.gz,, -NAIVE_INPUT,1,BLA203A12_S3_L001_R1_001.fastq.gz,BLA203A12_S3_L001_R2_001.fastq.gz,, -NAIVE_INPUT,2,BLA203A48_S39_L001_R1_001.fastq.gz,BLA203A48_S39_L001_R2_001.fastq.gz,, -NAIVE_INPUT,3,BLA203A49_S1_L006_R1_001.fastq.gz,BLA203A49_S1_L006_R2_001.fastq.gz,, diff --git a/assets/design_se.csv b/assets/design_se.csv deleted file mode 100644 index 069a5c943..000000000 --- a/assets/design_se.csv +++ /dev/null @@ -1,21 +0,0 @@ -group,replicate,fastq_1,fastq_2,antibody,control -WT_BCATENIN_IP,1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP,2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP,2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP,3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -NAIVE_BCATENIN_IP,1,BLA203A7_S60_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP,2,BLA203A43_S34_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP,2,BLA203A43_S34_L002_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP,3,BLA203A64_S55_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -WT_TCF4_IP,1,BLA203A3_S29_L006_R1_001.fastq.gz,,TCF4,WT_INPUT -WT_TCF4_IP,2,BLA203A27_S18_L001_R1_001.fastq.gz,,TCF4,WT_INPUT -WT_TCF4_IP,3,BLA203A51_S42_L001_R1_001.fastq.gz,,TCF4,WT_INPUT -NAIVE_TCF4_IP,1,BLA203A9_S62_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT -NAIVE_TCF4_IP,2,BLA203A45_S36_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT -NAIVE_TCF4_IP,3,BLA203A66_S57_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT -WT_INPUT,1,BLA203A6_S32_L006_R1_001.fastq.gz,,, -WT_INPUT,2,BLA203A30_S21_L001_R1_001.fastq.gz,,, -WT_INPUT,3,BLA203A31_S21_L003_R1_001.fastq.gz,,, -NAIVE_INPUT,1,BLA203A12_S3_L001_R1_001.fastq.gz,,, -NAIVE_INPUT,2,BLA203A48_S39_L001_R1_001.fastq.gz,,, -NAIVE_INPUT,3,BLA203A49_S1_L006_R1_001.fastq.gz,,, diff --git a/assets/email_template.html b/assets/email_template.html index a3f19225f..5a4548bb0 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,6 +1,5 @@ - diff --git a/assets/email_template.txt b/assets/email_template.txt index 6d35a6974..e1b785260 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -6,7 +6,6 @@ `._,._,' nf-core/chipseq v${version} ---------------------------------------------------- - Run Name: $runName <% if (success){ diff --git a/assets/multiqc/deseq2_clustering_header.txt b/assets/multiqc/deseq2_clustering_header.txt index a5edcb79d..f7bb33d8f 100644 --- a/assets/multiqc/deseq2_clustering_header.txt +++ b/assets/multiqc/deseq2_clustering_header.txt @@ -1,9 +1,9 @@ #id: 'deseq2_clustering' #section_name: 'MERGED LIB: DESeq2 sample similarity' -#description: " matrix is generated from clustering by Euclidean distances between -# DESeq2 +#description: "Matrix is generated from clustering with Euclidean distances between +# DESeq2 # rlog values for each sample -# (see featurecounts_deseq2.r script)." +# in the deseq2_qc.r script." #plot_type: 'heatmap' #anchor: 'deseq2_clustering' #pconfig: diff --git a/assets/multiqc/deseq2_pca_header.txt b/assets/multiqc/deseq2_pca_header.txt index a086d306c..250c1cb77 100644 --- a/assets/multiqc/deseq2_pca_header.txt +++ b/assets/multiqc/deseq2_pca_header.txt @@ -1,8 +1,8 @@ #id: 'deseq2_pca' #section_name: 'MERGED LIB: DESeq2 PCA plot' -#description: "between samples in the experiment. +#description: "PCA plot of the samples in the experiment. # These values are calculated using DESeq2 -# in the featurecounts_deseq2.r script." +# in the deseq2_qc.r script." #plot_type: 'scatter' #anchor: 'deseq2_pca' #pconfig: diff --git a/assets/multiqc_config.yaml b/assets/multiqc_config.yaml deleted file mode 100644 index 6d5da1d66..000000000 --- a/assets/multiqc_config.yaml +++ /dev/null @@ -1,164 +0,0 @@ -report_comment: > - This report has been generated by the nf-core/chipseq - analysis pipeline. For information about how to interpret these results, please see the - documentation. - -data_format: 'yaml' - -run_modules: - - custom_content - - fastqc - - cutadapt - - samtools - - picard - - preseq - - featureCounts - - deeptools - - phantompeakqualtools - -exclude_modules: - - 'general_stats' - -module_order: - - fastqc: - name: 'LIB: FastQC (raw)' - info: 'This section of the report shows FastQC results before adapter trimming for individual libraries.' - path_filters: - - './fastqc/*.zip' - - cutadapt: - name: 'LIB: cutadapt (trimmed)' - info: 'This section of the report shows the length of trimmed reads by cutadapt for individual libraries.' - - fastqc: - name: 'LIB: FastQC (trimmed)' - info: 'This section of the report shows FastQC results after adapter trimming for individual libraries.' - path_filters: - - './trimgalore/fastqc/*.zip' - - samtools: - name: 'LIB: SAMTools' - info: 'This section of the report shows SAMTools results for individual libraries.' - path_filters: - - './alignment/library/*' - - samtools: - name: 'MERGED LIB: SAMTools (unfiltered)' - info: 'This section of the report shows SAMTools results after merging libraries and before filtering.' - path_filters: - - './alignment/mergedLibrary/*.mLb.mkD.sorted.bam*' - - preseq: - name: 'MERGED LIB: Preseq (unfiltered)' - info: 'This section of the report shows Preseq results after merging libraries and before filtering.' - - samtools: - name: 'MERGED LIB: SAMTools (filtered)' - info: 'This section of the report shows SAMTools results after merging libraries and after filtering.' - path_filters: - - './alignment/mergedLibrary/*.mLb.clN.sorted.bam*' - - picard: - name: 'MERGED LIB: Picard' - info: 'This section of the report shows picard results after merging libraries and after filtering.' - path_filters: - - './alignment/mergedLibrary/picard_metrics/*' - - deeptools: - name: 'MERGED LIB: deepTools' - anchor: 'mlib_deeptools' - info: 'This section of the report shows ChIP-seq QC plots generated by deepTools.' - - featureCounts: - name: 'MERGED LIB: featureCounts' - anchor: 'mlib_featurecounts' - info: 'This section of the report shows featureCounts results for the number of reads assigned to merged library consensus peaks.' - path_filters: - - './macs/consensus/*.summary' - -report_section_order: - peak_count: - before: mlib_deeptools - frip_score: - before: peak_count - peak_annotation: - before: frip_score - strand_shift_correlation: - before: peak_annotation - nsc_coefficient: - before: strand_shift_correlation - rsc_coefficient: - before: nsc_coefficient - mlib_featurecounts: - before: rsc_coefficient - deseq2_pca_1: - order: -1600 - deseq2_pca_2: - order: -1700 - deseq2_pca_3: - order: -1800 - deseq2_pca_4: - order: -1900 - deseq2_pca_5: - order: -2000 - deseq2_pca_6: - order: -2100 - deseq2_pca_7: - order: -2200 - deseq2_pca_8: - order: -2300 - deseq2_pca_9: - order: -2400 - deseq2_pca_10: - order: -2500 - deseq2_clustering_1: - order: -2600 - deseq2_clustering_2: - order: -2700 - deseq2_clustering_3: - order: -2800 - deseq2_clustering_4: - order: -2900 - deseq2_clustering_5: - order: -3000 - deseq2_clustering_6: - order: -3100 - deseq2_clustering_7: - order: -3200 - deseq2_clustering_8: - order: -3300 - deseq2_clustering_9: - order: -3400 - deseq2_clustering_10: - order: -3500 - software_versions: - order: -3600 - nf-core-chipseq-summary: - order: -3700 - -custom_plot_config: - picard_insert_size: - cpswitch_c_active: False - smooth_points: 1000 - featurecounts: - cpswitch_c_active: False - -extra_fn_clean_exts: - - 'fastq.gz' - - '_trimmed' - - '_val' - - 'sorted.bam' - - '.Lb' - - 'mkD' - - 'clN' - - 'mLb' - - '_peaks' - - '_spp' - - '.spp' - - 'ccurve' - -# # Customise the module search patterns to speed up execution time -# # - Skip module sub-tools that we are not interested in -# # - Replace file-content searching with filename pattern searching -# # - Don't add anything that is the same as the MultiQC default -# # See https://multiqc.info/docs/#optimise-file-search-patterns for details -sp: - cutadapt: - fn: '*trimming_report.txt' - preseq: - fn: '*.ccurve.txt' - deeptools/plotFingerprintOutRawCounts: - fn: '*plotFingerprint*' - deeptools/plotProfile: - fn: '*plotProfile*' diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml new file mode 100644 index 000000000..4493905e5 --- /dev/null +++ b/assets/multiqc_config.yml @@ -0,0 +1,175 @@ +report_comment: > + This report has been generated by the nf-core/chipseq + analysis pipeline. For information about how to interpret these results, please see the + documentation. + +data_format: "yaml" + +export_plots: true + +run_modules: + - custom_content + - fastqc + - cutadapt + - samtools + - picard + - preseq + - featureCounts + - deeptools + - phantompeakqualtools + +exclude_modules: + - "general_stats" + +module_order: + - fastqc: + name: "LIB: FastQC (raw)" + info: "This section of the report shows FastQC results before adapter trimming for individual libraries." + path_filters: + - "./fastqc/*.zip" + - cutadapt: + name: "LIB: cutadapt (trimmed)" + info: "This section of the report shows the length of trimmed reads by cutadapt for individual libraries." + - fastqc: + name: "LIB: FastQC (trimmed)" + info: "This section of the report shows FastQC results after adapter trimming for individual libraries." + path_filters: + - "./trimgalore/fastqc/*.zip" + - samtools: + name: "LIB: SAMTools" + info: "This section of the report shows SAMTools results for individual libraries." + path_filters: + - "./alignment/library/*" + - samtools: + name: "MERGED LIB: SAMTools (unfiltered)" + info: "This section of the report shows SAMTools results after merging libraries and before filtering." + path_filters: + - "./alignment/mergedLibrary/unfiltered/*.mLb.mkD.sorted.bam*" + - picard: + name: "MERGED LIB: Picard (unfiltered)" + info: "This section of the report shows picard results after merging libraries and before filtering." + path_filters: + - "./alignment/mergedLibrary/unfiltered/picard_metrics/*" + - preseq: + name: "MERGED LIB: Preseq (unfiltered)" + info: "This section of the report shows Preseq results after merging libraries and before filtering." + - samtools: + name: "MERGED LIB: SAMTools (filtered)" + info: "This section of the report shows SAMTools results after merging libraries and after filtering." + path_filters: + - "./alignment/mergedLibrary/filtered/*.mLb.clN.sorted.bam*" + - picard: + name: "MERGED LIB: Picard (filtered)" + info: "This section of the report shows picard results after merging libraries and after filtering." + path_filters: + - "./alignment/mergedLibrary/filtered/picard_metrics/*" + - deeptools: + name: "MERGED LIB: deepTools" + anchor: "mlib_deeptools" + info: "This section of the report shows ChIP-seq QC plots generated by deepTools." + - featureCounts: + name: "MERGED LIB: featureCounts" + anchor: "mlib_featurecounts" + info: "This section of the report shows featureCounts results for the number of reads assigned to merged library consensus peaks." + path_filters: + - "./macs2/featurecounts/*.summary" + +report_section_order: + peak_count: + before: mlib_deeptools + frip_score: + before: peak_count + peak_annotation: + before: frip_score + strand_shift_correlation: + before: peak_annotation + nsc_coefficient: + before: strand_shift_correlation + rsc_coefficient: + before: nsc_coefficient + mlib_featurecounts: + before: rsc_coefficient + deseq2_pca_1: + order: -1600 + deseq2_pca_2: + order: -1700 + deseq2_pca_3: + order: -1800 + deseq2_pca_4: + order: -1900 + deseq2_pca_5: + order: -2000 + deseq2_pca_6: + order: -2100 + deseq2_pca_7: + order: -2200 + deseq2_pca_8: + order: -2300 + deseq2_pca_9: + order: -2400 + deseq2_pca_10: + order: -2500 + deseq2_clustering_1: + order: -2600 + deseq2_clustering_2: + order: -2700 + deseq2_clustering_3: + order: -2800 + deseq2_clustering_4: + order: -2900 + deseq2_clustering_5: + order: -3000 + deseq2_clustering_6: + order: -3100 + deseq2_clustering_7: + order: -3200 + deseq2_clustering_8: + order: -3300 + deseq2_clustering_9: + order: -3400 + deseq2_clustering_10: + order: -3500 + software_versions: + order: -3600 + nf-core-chipseq-summary: + order: -3700 + +custom_plot_config: + picard_insert_size: + cpswitch_c_active: False + smooth_points: 1000 + featurecounts: + cpswitch_c_active: False + +extra_fn_clean_exts: + - "fastq.gz" + - "_trimmed" + - "_val" + - "sorted.bam" + - ".Lb" + - "mkD" + - "clN" + - "mLb" + - "_peaks" + - ".FRiP" + - ".peak" + - "_spp" + - ".spp" + - "lc_extrap" + +# # Customise the module search patterns to speed up execution time +# # - Skip module sub-tools that we are not interested in +# # - Replace file-content searching with filename pattern searching +# # - Don't add anything that is the same as the MultiQC default +# # See https://multiqc.info/docs/#optimise-file-search-patterns for details +sp: + cutadapt: + fn: "*trimming_report.txt" + preseq: + fn: "*.lc_extrap.txt" + deeptools/plotFingerprintOutRawCounts: + fn: "*plotFingerprint*" + deeptools/plotProfile: + fn: "*plotProfile*" + phantompeakqualtools/out: + fn: "*.spp.out" diff --git a/assets/nf-core-chipseq_logo.png b/assets/nf-core-chipseq_logo.png deleted file mode 100644 index 7e95496be..000000000 Binary files a/assets/nf-core-chipseq_logo.png and /dev/null differ diff --git a/assets/nf-core-chipseq_logo_light.png b/assets/nf-core-chipseq_logo_light.png new file mode 100644 index 000000000..21300d1d3 Binary files /dev/null and b/assets/nf-core-chipseq_logo_light.png differ diff --git a/assets/nf-core-chipseq_social_preview.png b/assets/nf-core-chipseq_social_preview.png deleted file mode 100644 index 57778defb..000000000 Binary files a/assets/nf-core-chipseq_social_preview.png and /dev/null differ diff --git a/assets/nf-core-chipseq_social_preview.svg b/assets/nf-core-chipseq_social_preview.svg deleted file mode 100644 index 078dfb8c9..000000000 --- a/assets/nf-core-chipseq_social_preview.svg +++ /dev/null @@ -1,446 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - image/svg+xml - - - - - - - ChIP-seq peak-calling, QC and differential analysis pipeline - chipseq - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/assets/samplesheet_pe.csv b/assets/samplesheet_pe.csv new file mode 100644 index 000000000..3a304f121 --- /dev/null +++ b/assets/samplesheet_pe.csv @@ -0,0 +1,21 @@ +sample,fastq_1,fastq_2,antibody,control +WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,BLA203A1_S27_L006_R2_001.fastq.gz,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,BLA203A25_S16_L001_R2_001.fastq.gz,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,BLA203A25_S16_L002_R2_001.fastq.gz,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,BLA203A49_S40_L001_R2_001.fastq.gz,BCATENIN,WT_INPUT +NAIVE_BCATENIN_IP_REP1,BLA203A7_S60_L001_R1_001.fastq.gz,BLA203A7_S60_L001_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L001_R1_001.fastq.gz,BLA203A43_S34_L001_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L002_R1_001.fastq.gz,BLA203A43_S34_L002_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP3,BLA203A64_S55_L001_R1_001.fastq.gz,BLA203A64_S55_L001_R2_001.fastq.gz,BCATENIN,NAIVE_INPUT +WT_TCF4_IP_REP1,BLA203A3_S29_L006_R1_001.fastq.gz,BLA203A3_S29_L006_R2_001.fastq.gz,TCF4,WT_INPUT +WT_TCF4_IP_REP2,BLA203A27_S18_L001_R1_001.fastq.gz,BLA203A27_S18_L001_R2_001.fastq.gz,TCF4,WT_INPUT +WT_TCF4_IP_REP2,BLA203A51_S42_L001_R1_001.fastq.gz,BLA203A51_S42_L001_R2_001.fastq.gz,TCF4,WT_INPUT +NAIVE_TCF4_IP_REP1,BLA203A9_S62_L001_R1_001.fastq.gz,BLA203A9_S62_L001_R2_001.fastq.gz,TCF4,NAIVE_INPUT +NAIVE_TCF4_IP_REP2,BLA203A45_S36_L001_R1_001.fastq.gz,BLA203A45_S36_L001_R2_001.fastq.gz,TCF4,NAIVE_INPUT +NAIVE_TCF4_IP_REP3,BLA203A66_S57_L001_R1_001.fastq.gz,BLA203A66_S57_L001_R2_001.fastq.gz,TCF4,NAIVE_INPUT +WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,BLA203A6_S32_L006_R2_001.fastq.gz,, +WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,BLA203A30_S21_L001_R2_001.fastq.gz,, +WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,BLA203A31_S21_L003_R2_001.fastq.gz,, +NAIVE_INPUT_REP1,BLA203A12_S3_L001_R1_001.fastq.gz,BLA203A12_S3_L001_R2_001.fastq.gz,, +NAIVE_INPUT_REP2,BLA203A48_S39_L001_R1_001.fastq.gz,BLA203A48_S39_L001_R2_001.fastq.gz,, +NAIVE_INPUT_REP3,BLA203A49_S1_L006_R1_001.fastq.gz,BLA203A49_S1_L006_R2_001.fastq.gz,, diff --git a/assets/samplesheet_se.csv b/assets/samplesheet_se.csv new file mode 100644 index 000000000..a9581d6e0 --- /dev/null +++ b/assets/samplesheet_se.csv @@ -0,0 +1,21 @@ +sample,fastq_1,fastq_2,antibody,control +WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT +NAIVE_BCATENIN_IP_REP1,BLA203A7_S60_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L002_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP3,BLA203A64_S55_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +WT_TCF4_IP_REP1,BLA203A3_S29_L006_R1_001.fastq.gz,,TCF4,WT_INPUT +WT_TCF4_IP_REP2,BLA203A27_S18_L001_R1_001.fastq.gz,,TCF4,WT_INPUT +WT_TCF4_IP_REP3,BLA203A51_S42_L001_R1_001.fastq.gz,,TCF4,WT_INPUT +NAIVE_TCF4_IP_REP1,BLA203A9_S62_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT +NAIVE_TCF4_IP_REP2,BLA203A45_S36_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT +NAIVE_TCF4_IP_REP3,BLA203A66_S57_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT +WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,,, +WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,,, +WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,,, +NAIVE_INPUT_REP1,BLA203A12_S3_L001_R1_001.fastq.gz,,, +NAIVE_INPUT_REP2,BLA203A48_S39_L001_R1_001.fastq.gz,,, +NAIVE_INPUT_REP3,BLA203A49_S1_L006_R1_001.fastq.gz,,, diff --git a/assets/schema_input.json b/assets/schema_input.json new file mode 100644 index 000000000..cda13e0b8 --- /dev/null +++ b/assets/schema_input.json @@ -0,0 +1,46 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema", + "$id": "https://raw.githubusercontent.com/nf-core/chipseq/master/assets/schema_input.json", + "title": "nf-core/chipseq pipeline - params.input schema", + "description": "Schema for the file provided with params.input", + "type": "array", + "items": { + "type": "object", + "properties": { + "sample": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Sample name must be provided and cannot contain spaces" + }, + "fastq_1": { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$", + "errorMessage": "FastQ file for reads 1 must be provided, cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "fastq_2": { + "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'", + "anyOf": [ + { + "type": "string", + "pattern": "^\\S+\\.f(ast)?q\\.gz$" + }, + { + "type": "string", + "maxLength": 0 + } + ] + }, + "antibody": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Antibody entry cannot contain spaces" + }, + "control": { + "type": "string", + "pattern": "^\\S+$", + "errorMessage": "Control entry cannot contain spaces" + } + }, + "required": ["sample", "fastq_1"] + } +} diff --git a/assets/sendmail_template.txt b/assets/sendmail_template.txt index 0042bf1d4..08d8114b4 100644 --- a/assets/sendmail_template.txt +++ b/assets/sendmail_template.txt @@ -12,18 +12,18 @@ $email_html Content-Type: image/png;name="nf-core-chipseq_logo.png" Content-Transfer-Encoding: base64 Content-ID: -Content-Disposition: inline; filename="nf-core-chipseq_logo.png" +Content-Disposition: inline; filename="nf-core-chipseq_logo_light.png" -<% out << new File("$baseDir/assets/nf-core-chipseq_logo.png"). - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' ) %> +<% out << new File("$projectDir/assets/nf-core-chipseq_logo_light.png"). + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' ) %> <% if (mqcFile){ @@ -37,15 +37,15 @@ Content-ID: Content-Disposition: attachment; filename=\"${mqcFileObj.getName()}\" ${mqcFileObj. - bytes. - encodeBase64(). - toString(). - tokenize( '\n' )*. - toList()*. - collate( 76 )*. - collect { it.join() }. - flatten(). - join( '\n' )} + bytes. + encodeBase64(). + toString(). + tokenize( '\n' )*. + toList()*. + collate( 76 )*. + collect { it.join() }. + flatten(). + join( '\n' )} """ }} %> diff --git a/bin/bampe_rm_orphan.py b/bin/bampe_rm_orphan.py index 5b0a6f729..4ab9935b8 100755 --- a/bin/bampe_rm_orphan.py +++ b/bin/bampe_rm_orphan.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 ############################################################################### ############################################################################### @@ -17,17 +17,25 @@ ############################################ ############################################ -Description = 'Remove singleton reads from paired-end BAM file i.e if read1 is present in BAM file without read 2 and vice versa.' +Description = ( + "Remove singleton reads from paired-end BAM file i.e if read1 is present in BAM file without read 2 and vice versa." +) Epilog = """Example usage: bampe_rm_orphan.py """ argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) ## REQUIRED PARAMETERS -argParser.add_argument('BAM_INPUT_FILE', help="Input BAM file sorted by name.") -argParser.add_argument('BAM_OUTPUT_FILE', help="Output BAM file sorted by name.") +argParser.add_argument("BAM_INPUT_FILE", help="Input BAM file sorted by name.") +argParser.add_argument("BAM_OUTPUT_FILE", help="Output BAM file sorted by name.") ## OPTIONAL PARAMETERS -argParser.add_argument('-fr', '--only_fr_pairs', dest="ONLY_FR_PAIRS", help="Only keeps pairs that are in FR orientation on same chromosome.",action='store_true') +argParser.add_argument( + "-fr", + "--only_fr_pairs", + dest="ONLY_FR_PAIRS", + help="Only keeps pairs that are in FR orientation on same chromosome.", + action="store_true", +) args = argParser.parse_args() ############################################ @@ -36,6 +44,7 @@ ############################################ ############################################ + def makedir(path): if not len(path) == 0: @@ -45,20 +54,25 @@ def makedir(path): if exception.errno != errno.EEXIST: raise + ############################################ ############################################ ## MAIN FUNCTION ############################################ ############################################ -def bampe_rm_orphan(BAMIn,BAMOut,onlyFRPairs=False): + +def bampe_rm_orphan(BAMIn, BAMOut, onlyFRPairs=False): ## SETUP DIRECTORY/FILE STRUCTURE OutDir = os.path.dirname(BAMOut) makedir(OutDir) ## COUNT VARIABLES - totalReads = 0; totalOutputPairs = 0; totalSingletons = 0; totalImproperPairs = 0 + totalReads = 0 + totalOutputPairs = 0 + totalSingletons = 0 + totalImproperPairs = 0 ## ITERATE THROUGH BAM FILE EOF = 0 @@ -69,7 +83,8 @@ def bampe_rm_orphan(BAMIn,BAMOut,onlyFRPairs=False): for read in iter: totalReads += 1 if currRead.qname == read.qname: - pair1 = currRead; pair2 = read + pair1 = currRead + pair2 = read ## FILTER FOR READS ON SAME CHROMOSOME IN FR ORIENTATION if onlyFRPairs: @@ -125,30 +140,31 @@ def bampe_rm_orphan(BAMIn,BAMOut,onlyFRPairs=False): SAMFin.close() SAMFout.close() - LogFile = os.path.join(OutDir,'%s_bampe_rm_orphan.log' % (os.path.basename(BAMOut[:-4]))) - SamLogFile = open(LogFile,'w') - SamLogFile.write('\n##############################\n') - SamLogFile.write('FILES/DIRECTORIES') - SamLogFile.write('\n##############################\n\n') - SamLogFile.write('Input File: ' + BAMIn + '\n') - SamLogFile.write('Output File: ' + BAMOut + '\n') - SamLogFile.write('\n##############################\n') - SamLogFile.write('OVERALL COUNTS') - SamLogFile.write('\n##############################\n\n') - SamLogFile.write('Total Input Reads = ' + str(totalReads) + '\n') - SamLogFile.write('Total Output Pairs = ' + str(totalOutputPairs) + '\n') - SamLogFile.write('Total Singletons Excluded = ' + str(totalSingletons) + '\n') - SamLogFile.write('Total Improper Pairs Excluded = ' + str(totalImproperPairs) + '\n') - SamLogFile.write('\n##############################\n') + LogFile = os.path.join(OutDir, "%s_bampe_rm_orphan.log" % (os.path.basename(BAMOut[:-4]))) + SamLogFile = open(LogFile, "w") + SamLogFile.write("\n##############################\n") + SamLogFile.write("FILES/DIRECTORIES") + SamLogFile.write("\n##############################\n\n") + SamLogFile.write("Input File: " + BAMIn + "\n") + SamLogFile.write("Output File: " + BAMOut + "\n") + SamLogFile.write("\n##############################\n") + SamLogFile.write("OVERALL COUNTS") + SamLogFile.write("\n##############################\n\n") + SamLogFile.write("Total Input Reads = " + str(totalReads) + "\n") + SamLogFile.write("Total Output Pairs = " + str(totalOutputPairs) + "\n") + SamLogFile.write("Total Singletons Excluded = " + str(totalSingletons) + "\n") + SamLogFile.write("Total Improper Pairs Excluded = " + str(totalImproperPairs) + "\n") + SamLogFile.write("\n##############################\n") SamLogFile.close() + ############################################ ############################################ ## RUN FUNCTION ############################################ ############################################ -bampe_rm_orphan(BAMIn=args.BAM_INPUT_FILE,BAMOut=args.BAM_OUTPUT_FILE,onlyFRPairs=args.ONLY_FR_PAIRS) +bampe_rm_orphan(BAMIn=args.BAM_INPUT_FILE, BAMOut=args.BAM_OUTPUT_FILE, onlyFRPairs=args.ONLY_FR_PAIRS) ############################################ ############################################ diff --git a/bin/check_design.py b/bin/check_design.py deleted file mode 100755 index 51a993753..000000000 --- a/bin/check_design.py +++ /dev/null @@ -1,191 +0,0 @@ -#!/usr/bin/env python - -####################################################################### -####################################################################### -## Created on April 4th 2019 to check nf-core/chipseq design file -####################################################################### -####################################################################### - -import os -import sys -import argparse - -############################################ -############################################ -## PARSE ARGUMENTS -############################################ -############################################ - -Description = 'Reformat nf-core/chipseq design file and check its contents.' -Epilog = """Example usage: python check_design.py """ - -argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) - -## REQUIRED PARAMETERS -argParser.add_argument('DESIGN_FILE', help="Input design file.") -argParser.add_argument('READ_MAPPING_FILE', help="Output design file containing sample ids and reads.") -argParser.add_argument('CONTROL_MAPPING_FILE', help="Output design file containing ip vs control mappings.") -args = argParser.parse_args() - -############################################ -############################################ -## MAIN FUNCTION -############################################ -############################################ - -def reformat_design(DesignFile,ReadMappingFile,ControlMappingFile): - - ERROR_STR = 'ERROR: Please check design file' - HEADER = ['group', 'replicate', 'fastq_1', 'fastq_2', 'antibody', 'control'] - - ## CHECK HEADER - fin = open(DesignFile,'r') - header = fin.readline().strip().split(',') - if header != HEADER: - print("{} header: {} != {}".format(ERROR_STR,','.join(header),','.join(HEADER))) - sys.exit(1) - - numColList = [] - sampleMappingDict = {} - antibodyDict = {} - while True: - line = fin.readline() - if line: - lspl = [x.strip() for x in line.strip().split(',')] - group,replicate,fastQFiles,antibody,control = lspl[0],lspl[1],[x for x in lspl[2:-2] if x],lspl[-2],lspl[-1] - - ## CHECK VALID NUMBER OF COLUMNS PER SAMPLE - numCols = len(lspl) - if numCols not in [6]: - print("{}: Invalid number of columns (should be 6)!\nLine: '{}'".format(ERROR_STR,line.strip())) - sys.exit(1) - numColList.append(numCols) - - ## CHECK GROUP ID DOESNT CONTAIN SPACES - if group.find(' ') != -1: - print("{}: Group id contains spaces!\nLine: '{}'".format(ERROR_STR,line.strip())) - sys.exit(1) - - ## CHECK REPLICATE COLUMN IS INTEGER - if not replicate.isdigit(): - print("{}: Replicate id not an integer!\nLine: '{}'".format(ERROR_STR,line.strip())) - sys.exit(1) - replicate = int(replicate) - - for fastq in fastQFiles: - ## CHECK FASTQ FILE EXTENSION - if fastq[-9:] != '.fastq.gz' and fastq[-6:] != '.fq.gz': - print("{}: FastQ file has incorrect extension (has to be '.fastq.gz' or 'fq.gz') - {}\nLine: '{}'".format(ERROR_STR,fastq,line.strip())) - sys.exit(1) - - ## CREATE GROUP MAPPING DICT = {GROUP_ID: {REPLICATE_ID:[[FASTQ_FILES]]} - if group not in sampleMappingDict: - sampleMappingDict[group] = {} - if replicate not in sampleMappingDict[group]: - sampleMappingDict[group][replicate] = [] - sampleMappingDict[group][replicate].append(fastQFiles) - - ## CHECK BOTH ANTIBODY AND CONTROL COLUMNS HAVE VALID VALUES - if antibody: - if antibody.find(' ') != -1: - print("{}: Antibody id contains spaces!\nLine: '{}'".format(ERROR_STR,line.strip())) - sys.exit(1) - if not control: - print("{}: both Antibody and Control must be specified!\nLine: '{}'".format(ERROR_STR,line.strip())) - sys.exit(1) - if control: - if control.find(' ') != -1: - print("{}: Control id contains spaces!\nLine: '{}'".format(ERROR_STR,line.strip())) - sys.exit(1) - if not antibody: - print("{}: both Antibody and Control must be specified!\nLine: '{}'".format(ERROR_STR,line.strip())) - sys.exit(1) - - ## CREATE ANTIBODY MAPPING CONTROL DICT - if antibody and control: - antibodyDict[group] = (antibody,control) - - else: - fin.close() - break - - ## CHECK IF DATA IS PAIRED-END OR SINGLE-END AND NOT A MIXTURE - if min(numColList) != max(numColList): - print("{}: Mixture of paired-end and single-end reads!".format(ERROR_STR)) - sys.exit(1) - - ## CHECK IF ANTIBODY AND CONTROL COLUMNS HAVE BEEN SPECIFIED AT LEAST ONCE - if len(antibodyDict) == 0: - print("{}: Antibody and Control must be specified at least once!".format(ERROR_STR)) - sys.exit(1) - - ## WRITE READ MAPPING FILE - antibodyGroupDict = {} - fout = open(ReadMappingFile,'w') - fout.write(','.join(['sample_id','fastq_1','fastq_2']) + '\n') - for group in sorted(sampleMappingDict.keys()): - - ## CHECK THAT REPLICATE IDS ARE IN FORMAT 1.. - uniq_rep_ids = set(sampleMappingDict[group].keys()) - if len(uniq_rep_ids) != max(uniq_rep_ids): - print("{}: Replicate IDs must start with 1..\nGroup: {}, Replicate IDs: {}".format(ERROR_STR,group,list(uniq_rep_ids))) - sys.exit(1) - - ## RECONSTRUCT LINE FOR SAMPLE IN DESIGN - for replicate in sorted(sampleMappingDict[group].keys()): - for idx in range(len(sampleMappingDict[group][replicate])): - fastQFiles = sampleMappingDict[group][replicate][idx] - - ## GET SAMPLE_ID,FASTQ_1,FASTQ_2 COLUMNS - sample_id = "{}_R{}_T{}".format(group,replicate,idx+1) - oList = [sample_id] + fastQFiles - if len(fastQFiles) == 1: - oList += [''] - fout.write(','.join(oList) + '\n') - - ## EXTRAPOLATE CONTROL COLUMN - if group in antibodyDict: - antibody,control = antibodyDict[group] - if control in sampleMappingDict.keys(): - control_id = "{}_R1".format(control) - if replicate in sampleMappingDict[control]: - control_id = "{}_R{}".format(control,replicate) - if antibody not in antibodyGroupDict: - antibodyGroupDict[antibody] = {} - if group not in antibodyGroupDict[antibody]: - antibodyGroupDict[antibody][group] = [] - antibodyList = [sample_id[:-3],control_id] - if not antibodyList in antibodyGroupDict[antibody][group]: - antibodyGroupDict[antibody][group].append(antibodyList) - else: - print("{}: Control id not a valid group\nControl id: {}, Valid Groups: {}".format(ERROR_STR,control,sorted(sampleMappingDict.keys()))) - sys.exit(1) - fout.close() - - ## WRITE SAMPLE TO CONTROL MAPPING FILE - fout = open(ControlMappingFile,'w') - fout.write(','.join(['sample_id','control_id','antibody','replicatesExist','multipleGroups']) + '\n') - for antibody in sorted(antibodyGroupDict.keys()): - repsExist = '0' - if max([len(x) for x in antibodyGroupDict[antibody].values()]) > 1: - repsExist = '1' - multipleGroups = '0' - if len(antibodyGroupDict[antibody].keys()) > 1: - multipleGroups = '1' - for group in sorted(antibodyGroupDict[antibody].keys()): - for antibodyList in antibodyGroupDict[antibody][group]: - fout.write(','.join(antibodyList+[antibody,repsExist,multipleGroups]) + '\n') - fout.close() - -############################################ -############################################ -## RUN FUNCTION -############################################ -############################################ - -reformat_design(DesignFile=args.DESIGN_FILE,ReadMappingFile=args.READ_MAPPING_FILE,ControlMappingFile=args.CONTROL_MAPPING_FILE) - -############################################ -############################################ -############################################ -############################################ diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py new file mode 100755 index 000000000..eaf0d24c8 --- /dev/null +++ b/bin/check_samplesheet.py @@ -0,0 +1,188 @@ +#!/usr/bin/env python3 + +import os +import sys +import errno +import argparse + + +def parse_args(args=None): + Description = "Reformat nf-core/chipseq samplesheet file and check its contents." + Epilog = "Example usage: python check_samplesheet.py " + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("FILE_IN", help="Input samplesheet file.") + parser.add_argument("FILE_OUT", help="Output file.") + return parser.parse_args(args) + + +def make_dir(path): + if len(path) > 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise exception + + +def print_error(error, context="Line", context_str=""): + error_str = "ERROR: Please check samplesheet -> {}".format(error) + if context != "" and context_str != "": + error_str = "ERROR: Please check samplesheet -> {}\n{}: '{}'".format( + error, context.strip(), context_str.strip() + ) + print(error_str) + sys.exit(1) + + +def check_samplesheet(file_in, file_out): + """ + This function checks that the samplesheet follows the following structure: + sample,fastq_1,fastq_2,antibody,control + SPT5_T0_REP1,SRR1822153_1.fastq.gz,SRR1822153_2.fastq.gz,SPT5,SPT5_INPUT_REP1 + SPT5_T0_REP2,SRR1822154_1.fastq.gz,SRR1822154_2.fastq.gz,SPT5,SPT5_INPUT_REP2 + SPT5_INPUT_REP1,SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,SRR5204809_Spt5-ChIP_Input1_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,, + SPT5_INPUT_REP2,SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R1.fastq.gz,SRR5204810_Spt5-ChIP_Input2_SacCer_ChIP-Seq_ss100k_R2.fastq.gz,, + For an example see: + https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_test.csv + """ + + sample_mapping_dict = {} + with open(file_in, "r", encoding="utf-8-sig") as fin: + + ## Check header + MIN_COLS = 2 + HEADER = ["sample", "fastq_1", "fastq_2", "antibody", "control"] + header = [x.strip('"') for x in fin.readline().strip().split(",")] + if header[: len(HEADER)] != HEADER: + print(f"ERROR: Please check samplesheet header -> {','.join(header)} != {','.join(HEADER)}") + sys.exit(1) + + ## Check sample entries + for line in fin: + lspl = [x.strip().strip('"') for x in line.strip().split(",")] + + # Check valid number of columns per row + if len(lspl) < len(HEADER): + print_error( + "Invalid number of columns (minimum = {})!".format(len(HEADER)), + "Line", + line, + ) + num_cols = len([x for x in lspl if x]) + if num_cols < MIN_COLS: + print_error( + "Invalid number of populated columns (minimum = {})!".format(MIN_COLS), + "Line", + line, + ) + + ## Check sample name entries + sample, fastq_1, fastq_2, antibody, control = lspl[: len(HEADER)] + if sample.find(" ") != -1: + print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}") + sample = sample.replace(" ", "_") + if not sample: + print_error("Sample entry has not been specified!", "Line", line) + + ## Check FastQ file extension + for fastq in [fastq_1, fastq_2]: + if fastq: + if fastq.find(" ") != -1: + print_error("FastQ file contains spaces!", "Line", line) + if not fastq.endswith(".fastq.gz") and not fastq.endswith(".fq.gz"): + print_error( + "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!", + "Line", + line, + ) + + ## Check antibody and control columns have valid values + if antibody: + if antibody.find(" ") != -1: + print(f"WARNING: Spaces have been replaced by underscores for antibody: {antibody}") + antibody = antibody.replace(" ", "_") + if not control: + print_error( + "Both antibody and control columns must be specified!", + "Line", + line, + ) + if control: + if control.find(" ") != -1: + print(f"WARNING: Spaces have been replaced by underscores for control: {control}") + control = control.replace(" ", "_") + if not antibody: + print_error( + "Both antibody and control columns must be specified!", + "Line", + line, + ) + + ## Auto-detect paired-end/single-end + sample_info = [] ## [single_end, fastq_1, fastq_2, antibody, control] + if sample and fastq_1 and fastq_2: ## Paired-end short reads + sample_info = ["0", fastq_1, fastq_2, antibody, control] + elif sample and fastq_1 and not fastq_2: ## Single-end short reads + sample_info = ["1", fastq_1, fastq_2, antibody, control] + else: + print_error("Invalid combination of columns provided!", "Line", line) + + ## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, antibody, control ]]} + if sample not in sample_mapping_dict: + sample_mapping_dict[sample] = [sample_info] + else: + if sample_info in sample_mapping_dict[sample]: + print_error("Samplesheet contains duplicate rows!", "Line", line) + else: + sample_mapping_dict[sample].append(sample_info) + + ## Write validated samplesheet with appropriate columns + if len(sample_mapping_dict) > 0: + out_dir = os.path.dirname(file_out) + make_dir(out_dir) + with open(file_out, "w") as fout: + fout.write( + ",".join( + [ + "sample", + "single_end", + "fastq_1", + "fastq_2", + "antibody", + "control", + ] + ) + + "\n" + ) + for sample in sorted(sample_mapping_dict.keys()): + + ## Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end + if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]): + print_error( + f"Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end!", + "Sample", + sample, + ) + + for idx, val in enumerate(sample_mapping_dict[sample]): + control = val[-1] + if control and control not in sample_mapping_dict.keys(): + print_error( + f"Control identifier has to match does a provided sample identifier!", + "Control", + control, + ) + + fout.write(",".join([f"{sample}_T{idx+1}"] + val) + "\n") + else: + print_error(f"No entries to process!", "Samplesheet: {file_in}") + + +def main(args=None): + args = parse_args(args) + check_samplesheet(args.FILE_IN, args.FILE_OUT) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/deseq2_qc.r b/bin/deseq2_qc.r new file mode 100755 index 000000000..e8c2617f5 --- /dev/null +++ b/bin/deseq2_qc.r @@ -0,0 +1,247 @@ +#!/usr/bin/env Rscript + +################################################ +################################################ +## REQUIREMENTS ## +################################################ +################################################ + +## PCA, HEATMAP AND SCATTERPLOTS FOR SAMPLES IN COUNTS FILE +## - SAMPLE NAMES HAVE TO END IN e.g. "_R1" REPRESENTING REPLICATE ID. LAST 3 CHARACTERS OF SAMPLE NAME WILL BE TRIMMED TO OBTAIN GROUP ID FOR DESEQ2 COMPARISONS. +## - PACKAGES BELOW NEED TO BE AVAILABLE TO LOAD WHEN RUNNING R + +################################################ +################################################ +## LOAD LIBRARIES ## +################################################ +################################################ + +library(optparse) +library(DESeq2) +library(ggplot2) +library(RColorBrewer) +library(pheatmap) + +################################################ +################################################ +## PARSE COMMAND-LINE PARAMETERS ## +################################################ +################################################ + +option_list <- list( + make_option(c("-i", "--count_file" ), type="character", default=NULL , metavar="path" , help="Count file matrix where rows are genes and columns are samples." ), + make_option(c("-f", "--count_col" ), type="integer" , default=2 , metavar="integer", help="First column containing sample count data." ), + make_option(c("-d", "--id_col" ), type="integer" , default=1 , metavar="integer", help="Column containing identifiers to be used." ), + make_option(c("-r", "--sample_suffix" ), type="character", default='' , metavar="string" , help="Suffix to remove after sample name in columns e.g. '.rmDup.bam' if 'DRUG_R1.rmDup.bam'."), + make_option(c("-o", "--outdir" ), type="character", default='./' , metavar="path" , help="Output directory." ), + make_option(c("-p", "--outprefix" ), type="character", default='deseq2', metavar="string" , help="Output prefix." ), + make_option(c("-v", "--vst" ), type="logical" , default=FALSE , metavar="boolean", help="Run vst transform instead of rlog." ), + make_option(c("-c", "--cores" ), type="integer" , default=1 , metavar="integer", help="Number of cores." ) +) + +opt_parser <- OptionParser(option_list=option_list) +opt <- parse_args(opt_parser) + +if (is.null(opt$count_file)){ + print_help(opt_parser) + stop("Please provide a counts file.", call.=FALSE) +} + +################################################ +################################################ +## READ IN COUNTS FILE ## +################################################ +################################################ + +count.table <- read.delim(file=opt$count_file,header=TRUE, row.names=NULL, skip=1, check.names=FALSE) +rownames(count.table) <- count.table[,opt$id_col] +count.table <- count.table[,opt$count_col:ncol(count.table),drop=FALSE] +colnames(count.table) <- gsub(opt$sample_suffix,"",colnames(count.table)) +colnames(count.table) <- gsub(pattern='\\.$', replacement='', colnames(count.table)) + +################################################ +################################################ +## RUN DESEQ2 ## +################################################ +################################################ + +if (file.exists(opt$outdir) == FALSE) { + dir.create(opt$outdir, recursive=TRUE) +} +setwd(opt$outdir) + +samples.vec <- colnames(count.table) +name_components <- strsplit(samples.vec, "_") +n_components <- length(name_components[[1]]) +decompose <- n_components!=1 && all(sapply(name_components, length)==n_components) +coldata <- data.frame(samples.vec, sample=samples.vec, row.names=1) +if (decompose) { + groupings <- as.data.frame(lapply(1:n_components, function(i) sapply(name_components, "[[", i))) + names(groupings) <- paste0("Group", 1:n_components) + n_distinct <- sapply(groupings, function(grp) length(unique(grp))) + groupings <- groupings[n_distinct!=1 & n_distinct!=length(samples.vec)] + if (ncol(groupings)!=0) { + coldata <- cbind(coldata, groupings) + } else { + decompose <- FALSE + } +} + +DDSFile <- paste(opt$outprefix,".dds.RData",sep="") + +counts <- count.table[,samples.vec,drop=FALSE] +dds <- DESeqDataSetFromMatrix(countData=round(counts), colData=coldata, design=~ 1) +dds <- estimateSizeFactors(dds) +if (min(dim(count.table))<=1) { # No point if only one sample, or one gene + save(dds,file=DDSFile) + saveRDS(dds, file=sub("\\.dds\\.RData$", ".rds", DDSFile)) + warning("Not enough samples or genes in counts file for PCA.", call.=FALSE) + quit(save = "no", status = 0, runLast = FALSE) +} +if (!opt$vst) { + vst_name <- "rlog" + rld <- rlog(dds) +} else { + vst_name <- "vst" + rld <- varianceStabilizingTransformation(dds) +} + +assay(dds, vst_name) <- assay(rld) +save(dds,file=DDSFile) +saveRDS(dds, file=sub("\\.dds\\.RData$", ".rds", DDSFile)) + +################################################ +################################################ +## PLOT QC ## +################################################ +################################################ + +##' PCA pre-processeor +##' +##' Generate all the necessary information to plot PCA from a DESeq2 object +##' in which an assay containing a variance-stabilised matrix of counts is +##' stored. Copied from DESeq2::plotPCA, but with additional ability to +##' say which assay to run the PCA on. +##' +##' @param object The DESeq2DataSet object. +##' @param ntop number of top genes to use for principla components, selected by highest row variance. +##' @param assay the name or index of the assay that stores the variance-stabilised data. +##' @return A data.frame containing the projected data alongside the grouping columns. +##' A 'percentVar' attribute is set which includes the percentage of variation each PC explains, +##' and additionally how much the variation within that PC is explained by the grouping variable. +##' @author Gavin Kelly +plotPCA_vst <- function (object, ntop = 500, assay=length(assays(object))) { + rv <- rowVars(assay(object, assay)) + select <- order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))] + pca <- prcomp(t(assay(object, assay)[select, ]), center=TRUE, scale=FALSE) + percentVar <- pca$sdev^2/sum(pca$sdev^2) + df <- cbind( as.data.frame(colData(object)), pca$x) + #Order points so extreme samples are more likely to get label + ord <- order(abs(rank(df$PC1)-median(df$PC1)), abs(rank(df$PC2)-median(df$PC2))) + df <- df[ord,] + attr(df, "percentVar") <- data.frame(PC=seq(along=percentVar), percentVar=100*percentVar) + return(df) +} + +PlotFile <- paste(opt$outprefix,".plots.pdf",sep="") + +pdf(file=PlotFile, onefile=TRUE, width=7, height=7) +## PCA +ntop <- c(500, Inf) +for (n_top_var in ntop) { + pca.data <- plotPCA_vst(dds, assay=vst_name, ntop=n_top_var) + percentVar <- round(attr(pca.data, "percentVar")$percentVar) + plot_subtitle <- ifelse(n_top_var==Inf, "All genes", paste("Top", n_top_var, "genes")) + pl <- ggplot(pca.data, aes(PC1, PC2, label=paste0(" ", sample, " "))) + + geom_point() + + geom_text(check_overlap=TRUE, vjust=0.5, hjust="inward") + + xlab(paste0("PC1: ",percentVar[1],"% variance")) + + ylab(paste0("PC2: ",percentVar[2],"% variance")) + + labs(title = paste0("First PCs on ", vst_name, "-transformed data"), subtitle = plot_subtitle) + + theme(legend.position="top", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + panel.border = element_rect(colour = "black", fill=NA, size=1)) + print(pl) + + if (decompose) { + pc_names <- paste0("PC", attr(pca.data, "percentVar")$PC) + long_pc <- reshape(pca.data, varying=pc_names, direction="long", sep="", timevar="component", idvar="pcrow") + long_pc <- subset(long_pc, component<=5) + long_pc_grp <- reshape(long_pc, varying=names(groupings), direction="long", sep="", timevar="grouper") + long_pc_grp <- subset(long_pc_grp, grouper<=5) + long_pc_grp$component <- paste("PC", long_pc_grp$component) + long_pc_grp$grouper <- paste0(long_pc_grp$grouper, c("st","nd","rd","th","th")[long_pc_grp$grouper], " prefix") + pl <- ggplot(long_pc_grp, aes(x=Group, y=PC)) + + geom_point() + + stat_summary(fun=mean, geom="line", aes(group = 1)) + + labs(x=NULL, y=NULL, subtitle = plot_subtitle, title="PCs split by sample-name prefixes") + + facet_grid(component~grouper, scales="free_x") + + scale_x_discrete(guide = guide_axis(n.dodge = 3)) + print(pl) + } +} # at end of loop, we'll be using the user-defined ntop if any, else all genes + +## WRITE PC1 vs PC2 VALUES TO FILE +pca.vals <- pca.data[,c("PC1","PC2")] +colnames(pca.vals) <- paste0(colnames(pca.vals), ": ", percentVar[1:2], '% variance') +pca.vals <- cbind(sample = rownames(pca.vals), pca.vals) +write.table(pca.vals, file = paste(opt$outprefix, ".pca.vals.txt", sep=""), + row.names = FALSE, col.names = TRUE, sep = "\t", quote = TRUE) + +## SAMPLE CORRELATION HEATMAP +sampleDists <- dist(t(assay(dds, vst_name))) +sampleDistMatrix <- as.matrix(sampleDists) +colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) +pheatmap( + sampleDistMatrix, + clustering_distance_rows=sampleDists, + clustering_distance_cols=sampleDists, + col=colors, + main=paste("Euclidean distance between", vst_name, "of samples") +) + +## WRITE SAMPLE DISTANCES TO FILE +write.table(cbind(sample = rownames(sampleDistMatrix), sampleDistMatrix),file=paste(opt$outprefix, ".sample.dists.txt", sep=""), + row.names=FALSE, col.names=TRUE, sep="\t", quote=FALSE) +dev.off() + +################################################ +################################################ +## SAVE SIZE FACTORS ## +################################################ +################################################ + +SizeFactorsDir <- "size_factors/" +if (file.exists(SizeFactorsDir) == FALSE) { + dir.create(SizeFactorsDir, recursive=TRUE) +} + +NormFactorsFile <- paste(SizeFactorsDir,opt$outprefix, ".size_factors.RData", sep="") + +normFactors <- sizeFactors(dds) +save(normFactors, file=NormFactorsFile) + +for (name in names(sizeFactors(dds))) { + sizeFactorFile <- paste(SizeFactorsDir,name, ".txt", sep="") + write(as.numeric(sizeFactors(dds)[name]), file=sizeFactorFile) +} + +################################################ +################################################ +## R SESSION INFO ## +################################################ +################################################ + +RLogFile <- "R_sessionInfo.log" + +sink(RLogFile) +a <- sessionInfo() +print(a) +sink() + +################################################ +################################################ +################################################ +################################################ diff --git a/bin/featurecounts_deseq2.r b/bin/featurecounts_deseq2.r deleted file mode 100755 index b75335a5b..000000000 --- a/bin/featurecounts_deseq2.r +++ /dev/null @@ -1,301 +0,0 @@ -#!/usr/bin/env Rscript - -################################################ -################################################ -## REQUIREMENTS ## -################################################ -################################################ - -## DIFFERENTIAL ANALYSIS, SCATTERPLOTS AND PCA FOR SAMPLES IN FEATURECOUNTS FILE - ## - FIRST SIX COLUMNS OF FEATURECOUNTS_FILE SHOULD BE INTERVAL INFO. REMAINDER OF COLUMNS SHOULD BE SAMPLES-SPECIFIC COUNTS. - ## - SAMPLE NAMES HAVE TO END IN "_R1" REPRESENTING REPLICATE ID. LAST 3 CHARACTERS OF SAMPLE NAME WILL BE TRIMMED TO OBTAIN GROUP ID FOR DESEQ2 COMPARISONS. - ## - BAM_SUFFIX IS PORTION OF FILENAME AFTER SAMPLE NAME IN FEATURECOUNTS COLUMN SAMPLE NAMES E.G. ".rmDup.bam" if "DRUG_R1.rmDup.bam" - ## - PACKAGES BELOW NEED TO BE AVAILABLE TO LOAD WHEN RUNNING R - -################################################ -################################################ -## LOAD LIBRARIES ## -################################################ -################################################ - -library(optparse) -library(DESeq2) -library(vsn) -library(ggplot2) -library(RColorBrewer) -library(pheatmap) -library(lattice) -library(BiocParallel) - -################################################ -################################################ -## PARSE COMMAND-LINE PARAMETERS ## -################################################ -################################################ - -option_list <- list(make_option(c("-i", "--featurecount_file"), type="character", default=NULL, help="Feature count file generated by the SubRead featureCounts command.", metavar="path"), - make_option(c("-b", "--bam_suffix"), type="character", default=NULL, help="Portion of filename after sample name in featurecount file header e.g. '.rmDup.bam' if 'DRUG_R1.rmDup.bam'", metavar="string"), - make_option(c("-o", "--outdir"), type="character", default='./', help="Output directory", metavar="path"), - make_option(c("-p", "--outprefix"), type="character", default='differential', help="Output prefix", metavar="string"), - make_option(c("-s", "--outsuffix"), type="character", default='', help="Output suffix for comparison-level results", metavar="string"), - make_option(c("-v", "--vst"), type="logical", default=FALSE, help="Run vst transform instead of rlog", metavar="boolean"), - make_option(c("-c", "--cores"), type="integer", default=1, help="Number of cores", metavar="integer")) - -opt_parser <- OptionParser(option_list=option_list) -opt <- parse_args(opt_parser) - -if (is.null(opt$featurecount_file)){ - print_help(opt_parser) - stop("Please provide featurecount file.", call.=FALSE) -} -if (is.null(opt$bam_suffix)){ - print_help(opt_parser) - stop("Please provide bam suffix in header of featurecount file.", call.=FALSE) -} - -################################################ -################################################ -## READ IN COUNTS FILE ## -################################################ -################################################ - -count.table <- read.delim(file=opt$featurecount_file,header=TRUE,skip=1) -colnames(count.table) <- gsub(opt$bam_suffix,"",colnames(count.table)) -colnames(count.table) <- as.character(lapply(colnames(count.table), function (x) tail(strsplit(x,'.',fixed=TRUE)[[1]],1))) -rownames(count.table) <- count.table$Geneid -interval.table <- count.table[,1:6] -count.table <- count.table[,7:ncol(count.table),drop=FALSE] - -################################################ -################################################ -## RUN DESEQ2 ## -################################################ -################################################ - -if (file.exists(opt$outdir) == FALSE) { - dir.create(opt$outdir,recursive=TRUE) -} -setwd(opt$outdir) - -samples.vec <- sort(colnames(count.table)) -groups <- sub("_[^_]+$", "", samples.vec) -print(unique(groups)) -if (length(unique(groups)) == 1) { - quit(save = "no", status = 0, runLast = FALSE) -} - -DDSFile <- paste(opt$outprefix,".dds.rld.RData",sep="") -if (file.exists(DDSFile) == FALSE) { - counts <- count.table[,samples.vec,drop=FALSE] - coldata <- data.frame(row.names=colnames(counts),condition=groups) - dds <- DESeqDataSetFromMatrix(countData = round(counts), colData = coldata, design = ~ condition) - dds <- DESeq(dds, parallel=TRUE, BPPARAM=MulticoreParam(opt$cores)) - if (!opt$vst) { - rld <- rlog(dds) - } else { - rld <- vst(dds) - } - save(dds,rld,file=DDSFile) -} - -################################################ -################################################ -## PLOT QC ## -################################################ -################################################ - -PlotFile <- paste(opt$outprefix,".plots.pdf",sep="") -if (file.exists(PlotFile) == FALSE) { - pdf(file=PlotFile,onefile=TRUE,width=7,height=7) - - ## PCA - pca.data <- DESeq2::plotPCA(rld,intgroup=c("condition"),returnData=TRUE) - percentVar <- round(100 * attr(pca.data, "percentVar")) - plot <- ggplot(pca.data, aes(PC1, PC2, color=condition)) + - geom_point(size=3) + - xlab(paste0("PC1: ",percentVar[1],"% variance")) + - ylab(paste0("PC2: ",percentVar[2],"% variance")) + - theme(panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.background = element_blank(), - panel.border = element_rect(colour = "black", fill=NA, size=1)) - print(plot) - - ## WRITE PC1 vs PC2 VALUES TO FILE - pca.vals <- pca.data[,1:2] - colnames(pca.vals) <- paste(colnames(pca.vals),paste(percentVar,'% variance',sep=""), sep=": ") - pca.vals <- cbind(sample = rownames(pca.vals), pca.vals) - write.table(pca.vals,file=paste(opt$outprefix,".pca.vals.txt",sep=""),row.names=FALSE,col.names=TRUE,sep="\t",quote=TRUE) - - ## SAMPLE CORRELATION HEATMAP - sampleDists <- dist(t(assay(rld))) - sampleDistMatrix <- as.matrix(sampleDists) - colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) - pheatmap(sampleDistMatrix,clustering_distance_rows=sampleDists,clustering_distance_cols=sampleDists,col=colors) - - ## WRITE SAMPLE DISTANCES TO FILE - write.table(cbind(sample = rownames(sampleDistMatrix), sampleDistMatrix),file=paste(opt$outprefix,".sample.dists.txt",sep=""),row.names=FALSE,col.names=TRUE,sep="\t",quote=FALSE) - - dev.off() -} - -################################################ -################################################ -## SAVE SIZE FACTORS ## -################################################ -################################################ - -SizeFactorsDir <- "sizeFactors/" -if (file.exists(SizeFactorsDir) == FALSE) { - dir.create(SizeFactorsDir,recursive=TRUE) -} - -NormFactorsFile <- paste(SizeFactorsDir,opt$outprefix,".sizeFactors.RData",sep="") -if (file.exists(NormFactorsFile) == FALSE) { - normFactors <- sizeFactors(dds) - save(normFactors,file=NormFactorsFile) - - for (name in names(sizeFactors(dds))) { - sizeFactorFile <- paste(SizeFactorsDir,name,opt$outsuffix,".sizeFactor.txt",sep="") - if (file.exists(sizeFactorFile) == FALSE) { - write(as.numeric(sizeFactors(dds)[name]),file=sizeFactorFile) - } - } -} - -################################################ -################################################ -## WRITE LOG FILE ## -################################################ -################################################ - -LogFile <- paste(opt$outprefix,".log",sep="") -if (file.exists(LogFile) == FALSE) { - cat("\nSamples =",samples.vec,"\n\n",file=LogFile,append=TRUE,sep=', ') - cat("Groups =",groups,"\n\n",file=LogFile,append=TRUE,sep=', ') - cat("Dimensions of count matrix =",dim(counts),"\n\n",file=LogFile,append=FALSE,sep=' ') - cat("\n",file=LogFile,append=TRUE,sep='') -} - -################################################ -################################################ -## LOOP THROUGH COMPARISONS ## -################################################ -################################################ - -ResultsFile <- paste(opt$outprefix,".results.txt",sep="") -if (file.exists(ResultsFile) == FALSE) { - - raw.counts <- counts(dds,normalized=FALSE) - colnames(raw.counts) <- paste(colnames(raw.counts),'raw',sep='.') - pseudo.counts <- counts(dds,normalized=TRUE) - colnames(pseudo.counts) <- paste(colnames(pseudo.counts),'pseudo',sep='.') - - deseq2_results_list <- list() - comparisons <- combn(unique(groups),2) - for (idx in 1:ncol(comparisons)) { - - control.group <- comparisons[1,idx] - treat.group <- comparisons[2,idx] - CompPrefix <- paste(control.group,treat.group,sep="vs") - cat("Saving results for ",CompPrefix," ...\n",sep="") - - CompOutDir <- paste(CompPrefix,'/',sep="") - if (file.exists(CompOutDir) == FALSE) { - dir.create(CompOutDir,recursive=TRUE) - } - - control.samples <- samples.vec[which(groups == control.group)] - treat.samples <- samples.vec[which(groups == treat.group)] - comp.samples <- c(control.samples,treat.samples) - - comp.results <- results(dds,contrast=c("condition",c(control.group,treat.group))) - comp.df <- as.data.frame(comp.results) - comp.table <- cbind(interval.table, as.data.frame(comp.df), raw.counts[,paste(comp.samples,'raw',sep='.')], pseudo.counts[,paste(comp.samples,'pseudo',sep='.')]) - - ## WRITE RESULTS FILE - CompResultsFile <- paste(CompOutDir,CompPrefix,opt$outsuffix,".deseq2.results.txt",sep="") - write.table(comp.table, file=CompResultsFile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) - - ## FILTER RESULTS BY FDR & LOGFC AND WRITE RESULTS FILE - pdf(file=paste(CompOutDir,CompPrefix,opt$outsuffix,".deseq2.plots.pdf",sep=""),width=10,height=8) - if (length(comp.samples) > 2) { - for (MIN_FDR in c(0.01,0.05)) { - - ## SUBSET RESULTS BY FDR - pass.fdr.table <- subset(comp.table, padj < MIN_FDR) - pass.fdr.up.table <- subset(comp.table, padj < MIN_FDR & log2FoldChange > 0) - pass.fdr.down.table <- subset(comp.table, padj < MIN_FDR & log2FoldChange < 0) - - ## SUBSET RESULTS BY FDR AND LOGFC - pass.fdr.logFC.table <- subset(comp.table, padj < MIN_FDR & abs(log2FoldChange) >= 1) - pass.fdr.logFC.up.table <- subset(comp.table, padj < MIN_FDR & abs(log2FoldChange) >= 1 & log2FoldChange > 0) - pass.fdr.logFC.down.table <- subset(comp.table, padj < MIN_FDR & abs(log2FoldChange) >= 1 & log2FoldChange < 0) - - ## WRITE RESULTS FILE - CompResultsFile <- paste(CompOutDir,CompPrefix,opt$outsuffix,".deseq2.FDR",MIN_FDR,".results.txt",sep="") - CompBEDFile <- paste(CompOutDir,CompPrefix,opt$outsuffix,".deseq2.FDR",MIN_FDR,".results.bed",sep="") - write.table(pass.fdr.table, file=CompResultsFile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) - write.table(pass.fdr.table[,c("Chr","Start","End","Geneid","log2FoldChange","Strand")], file=CompBEDFile, col.names=FALSE, row.names=FALSE, sep='\t', quote=FALSE) - - ## MA PLOT & VOLCANO PLOT - DESeq2::plotMA(comp.results, main=paste("MA plot FDR <= ",MIN_FDR,sep=""), ylim=c(-2,2),alpha=MIN_FDR) - plot(comp.table$log2FoldChange, -1*log10(comp.table$padj), col=ifelse(comp.table$padj<=MIN_FDR, "red", "black"), xlab="logFC", ylab="-1*log10(FDR)", main=paste("Volcano plot FDR <=",MIN_FDR,sep=" "), pch=20) - - ## ADD COUNTS TO LOGFILE - cat(CompPrefix," genes with FDR <= ",MIN_FDR,": ",nrow(pass.fdr.table)," (up=",nrow(pass.fdr.up.table),", down=",nrow(pass.fdr.down.table),")","\n",file=LogFile,append=TRUE,sep="") - cat(CompPrefix," genes with FDR <= ",MIN_FDR," & FC > 2: ",nrow(pass.fdr.logFC.table)," (up=",nrow(pass.fdr.logFC.up.table),", down=",nrow(pass.fdr.logFC.down.table),")","\n",file=LogFile,append=TRUE,sep="") - - } - cat("\n",file=LogFile,append=TRUE,sep="") - } - - ## SAMPLE CORRELATION HEATMAP - rld.subset <- assay(rld)[,comp.samples] - sampleDists <- dist(t(rld.subset)) - sampleDistMatrix <- as.matrix(sampleDists) - colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) - pheatmap(sampleDistMatrix,clustering_distance_rows=sampleDists,clustering_distance_cols=sampleDists,col=colors) - - ## SCATTER PLOT FOR RLOG COUNTS - combs <- combn(comp.samples,2,simplify=FALSE) - clabels <- sapply(combs,function(x){paste(x,collapse=' & ')}) - plotdat <- data.frame(x=unlist(lapply(combs, function(x){rld.subset[, x[1] ]})),y=unlist(lapply(combs, function(y){rld.subset[, y[2] ]})),comp=rep(clabels, each=nrow(rld.subset))) - plot <- xyplot(y~x|comp,plotdat, - panel=function(...){ - panel.xyplot(...) - panel.abline(0,1,col="red") - }, - par.strip.text=list(cex=0.5)) - print(plot) - dev.off() - - colnames(comp.df) <- paste(CompPrefix,".",colnames(comp.df),sep="") - deseq2_results_list[[idx]] <- comp.df - - } - - ## WRITE RESULTS FROM ALL COMPARISONS TO FILE - deseq2_results_table <- cbind(interval.table,do.call(cbind, deseq2_results_list),raw.counts,pseudo.counts) - write.table(deseq2_results_table, file=ResultsFile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) - -} - -################################################ -################################################ -## R SESSION INFO ## -################################################ -################################################ - -RLogFile <- "R_sessionInfo.log" -if (file.exists(RLogFile) == FALSE) { - sink(RLogFile) - a <- sessionInfo() - print(a) - sink() -} - -################################################ -################################################ -################################################ -################################################ diff --git a/bin/gtf2bed b/bin/gtf2bed index c2a8bbeee..66d523067 100755 --- a/bin/gtf2bed +++ b/bin/gtf2bed @@ -1,4 +1,4 @@ -#!/usr/bin/perl +#!/usr/bin/env perl # Copyright (c) 2011 Erik Aronesty (erik@q32.com) # @@ -33,90 +33,90 @@ my $in_cmd =($in =~ /\.gz$/ ? "gunzip -c $in|" : $in =~ /\.zip$/ ? "unzip -p $in open IN, $in_cmd; while () { - $gff = 2 if /^##gff-version 2/; - $gff = 3 if /^##gff-version 3/; - next if /^#/ && $gff; - - s/\s+$//; - # 0-chr 1-src 2-feat 3-beg 4-end 5-scor 6-dir 7-fram 8-attr - my @f = split /\t/; - if ($gff) { + $gff = 2 if /^##gff-version 2/; + $gff = 3 if /^##gff-version 3/; + next if /^#/ && $gff; + + s/\s+$//; + # 0-chr 1-src 2-feat 3-beg 4-end 5-scor 6-dir 7-fram 8-attr + my @f = split /\t/; + if ($gff) { # most ver 2's stick gene names in the id field - ($id) = $f[8]=~ /\bID="([^"]+)"/; + ($id) = $f[8]=~ /\bID="([^"]+)"/; # most ver 3's stick unquoted names in the name field - ($id) = $f[8]=~ /\bName=([^";]+)/ if !$id && $gff == 3; - } else { - ($id) = $f[8]=~ /transcript_id "([^"]+)"/; - } + ($id) = $f[8]=~ /\bName=([^";]+)/ if !$id && $gff == 3; + } else { + ($id) = $f[8]=~ /transcript_id "([^"]+)"/; + } - next unless $id && $f[0]; + next unless $id && $f[0]; - if ($f[2] eq 'exon') { - die "no position at exon on line $." if ! $f[3]; + if ($f[2] eq 'exon') { + die "no position at exon on line $." if ! $f[3]; # gff3 puts :\d in exons sometimes $id =~ s/:\d+$// if $gff == 3; - push @{$exons{$id}}, \@f; - # save lowest start - $trans{$id} = \@f if !$trans{$id}; - } elsif ($f[2] eq 'start_codon') { - #optional, output codon start/stop as "thick" region in bed - $sc{$id}->[0] = $f[3]; - } elsif ($f[2] eq 'stop_codon') { - $sc{$id}->[1] = $f[4]; - } elsif ($f[2] eq 'miRNA' ) { - $trans{$id} = \@f if !$trans{$id}; - push @{$exons{$id}}, \@f; - } + push @{$exons{$id}}, \@f; + # save lowest start + $trans{$id} = \@f if !$trans{$id}; + } elsif ($f[2] eq 'start_codon') { + #optional, output codon start/stop as "thick" region in bed + $sc{$id}->[0] = $f[3]; + } elsif ($f[2] eq 'stop_codon') { + $sc{$id}->[1] = $f[4]; + } elsif ($f[2] eq 'miRNA' ) { + $trans{$id} = \@f if !$trans{$id}; + push @{$exons{$id}}, \@f; + } } for $id ( - # sort by chr then pos - sort { - $trans{$a}->[0] eq $trans{$b}->[0] ? - $trans{$a}->[3] <=> $trans{$b}->[3] : - $trans{$a}->[0] cmp $trans{$b}->[0] - } (keys(%trans)) ) { - my ($chr, undef, undef, undef, undef, undef, $dir, undef, $attr, undef, $cds, $cde) = @{$trans{$id}}; + # sort by chr then pos + sort { + $trans{$a}->[0] eq $trans{$b}->[0] ? + $trans{$a}->[3] <=> $trans{$b}->[3] : + $trans{$a}->[0] cmp $trans{$b}->[0] + } (keys(%trans)) ) { + my ($chr, undef, undef, undef, undef, undef, $dir, undef, $attr, undef, $cds, $cde) = @{$trans{$id}}; my ($cds, $cde); ($cds, $cde) = @{$sc{$id}} if $sc{$id}; - # sort by pos - my @ex = sort { - $a->[3] <=> $b->[3] - } @{$exons{$id}}; - - my $beg = $ex[0][3]; - my $end = $ex[-1][4]; - - if ($dir eq '-') { - # swap - $tmp=$cds; - $cds=$cde; - $cde=$tmp; - $cds -= 2 if $cds; - $cde += 2 if $cde; - } - - # not specified, just use exons - $cds = $beg if !$cds; - $cde = $end if !$cde; - - # adjust start for bed - --$beg; --$cds; - - my $exn = @ex; # exon count - my $exst = join ",", map {$_->[3]-$beg-1} @ex; # exon start - my $exsz = join ",", map {$_->[4]-$_->[3]+1} @ex; # exon size + # sort by pos + my @ex = sort { + $a->[3] <=> $b->[3] + } @{$exons{$id}}; + + my $beg = $ex[0][3]; + my $end = $ex[-1][4]; + + if ($dir eq '-') { + # swap + $tmp=$cds; + $cds=$cde; + $cde=$tmp; + $cds -= 2 if $cds; + $cde += 2 if $cde; + } + + # not specified, just use exons + $cds = $beg if !$cds; + $cde = $end if !$cde; + + # adjust start for bed + --$beg; --$cds; + + my $exn = @ex; # exon count + my $exst = join ",", map {$_->[3]-$beg-1} @ex; # exon start + my $exsz = join ",", map {$_->[4]-$_->[3]+1} @ex; # exon size my $gene_id; my $extend = ""; if ($extended) { - ($gene_id) = $attr =~ /gene_name "([^"]+)"/; - ($gene_id) = $attr =~ /gene_id "([^"]+)"/ unless $gene_id; + ($gene_id) = $attr =~ /gene_name "([^"]+)"/; + ($gene_id) = $attr =~ /gene_id "([^"]+)"/ unless $gene_id; $extend="\t$gene_id"; } - # added an extra comma to make it look exactly like ucsc's beds - print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; + # added an extra comma to make it look exactly like ucsc's beds + print "$chr\t$beg\t$end\t$id\t0\t$dir\t$cds\t$cde\t0\t$exn\t$exsz,\t$exst,$extend\n"; } diff --git a/bin/igv_files_to_session.py b/bin/igv_files_to_session.py index 48e749c8e..629e6cc91 100755 --- a/bin/igv_files_to_session.py +++ b/bin/igv_files_to_session.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 ####################################################################### ####################################################################### @@ -22,12 +22,29 @@ argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) ## REQUIRED PARAMETERS -argParser.add_argument('XML_OUT', help="XML output file.") -argParser.add_argument('LIST_FILE', help="Tab-delimited file containing two columns i.e. file_name\tcolour. Header isnt required.") -argParser.add_argument('GENOME', help="Full path to genome fasta file or shorthand for genome available in IGV e.g. hg19.") +argParser.add_argument("XML_OUT", help="XML output file.") +argParser.add_argument( + "LIST_FILE", + help="Tab-delimited file containing two columns i.e. file_name\tcolour. Header isnt required.", +) +argParser.add_argument( + "REPLACE_FILE", + help="Tab-delimited file containing two columns i.e. file_name\treplacement_file_name. Header isnt required.", +) +argParser.add_argument( + "GENOME", + help="Full path to genome fasta file or shorthand for genome available in IGV e.g. hg19.", +) ## OPTIONAL PARAMETERS -argParser.add_argument('-pp', '--path_prefix', type=str, dest="PATH_PREFIX", default='', help="Path prefix to be added at beginning of all files in input list file.") +argParser.add_argument( + "-pp", + "--path_prefix", + type=str, + dest="PATH_PREFIX", + default="", + help="Path prefix to be added at beginning of all files in input list file.", +) args = argParser.parse_args() ############################################ @@ -36,6 +53,7 @@ ############################################ ############################################ + def makedir(path): if not len(path) == 0: @@ -45,76 +63,125 @@ def makedir(path): if exception.errno != errno.EEXIST: raise + ############################################ ############################################ ## MAIN FUNCTION ############################################ ############################################ -def igv_files_to_session(XMLOut,ListFile,Genome,PathPrefix=''): + +def igv_files_to_session(XMLOut, ListFile, ReplaceFile, Genome, PathPrefix=""): makedir(os.path.dirname(XMLOut)) + replaceFileDict = {} + fin = open(ReplaceFile, "r") + while True: + line = fin.readline() + if line: + ofile, rfile = line.strip().split("\t") + replaceFileDict[ofile] = rfile + else: + break + fin.close() fileList = [] - fin = open(ListFile,'r') + fin = open(ListFile, "r") while True: line = fin.readline() if line: - ifile,colour = line.strip().split('\t') + ifile, colour = line.strip().split("\t") if len(colour.strip()) == 0: - colour = '0,0,178' - fileList.append((PathPrefix.strip()+ifile,colour)) + colour = "0,0,178" + for ofile, rfile in replaceFileDict.items(): + if ofile in ifile: + ifile = ifile.replace(ofile, rfile) + fileList.append((PathPrefix.strip() + ifile, colour)) else: break - fout.close() + fin.close() + fout = open("igv_files.txt", "w") + for ifile, colour in fileList: + fout.write(ifile + "\n") + fout.close() ## ADD RESOURCES SECTION - XMLStr = '\n' + XMLStr = '\n' XMLStr += '\n' % (Genome) - XMLStr += '\t\n' - for ifile,colour in fileList: + XMLStr += "\t\n" + for ifile, colour in fileList: XMLStr += '\t\t\n' % (ifile) - XMLStr += '\t\n' + XMLStr += "\t\n" ## ADD PANEL SECTION XMLStr += '\t\n' - for ifile,colour in fileList: + for ifile, colour in fileList: extension = os.path.splitext(ifile)[1].lower() - if extension in ['.bed','.broadpeak','.narrowpeak']: - XMLStr += '\t\t\n' + % (ifile, os.path.basename(ifile)) + ) + elif extension in [".bw", ".bigwig", ".tdf"]: + XMLStr += ( + '\t\t\n' + % (ifile, os.path.basename(ifile)) + ) XMLStr += '\t\t\t\n' - XMLStr += '\t\t\n' - elif extension in ['.gtf']: - XMLStr += '\t\t\n' + % (ifile, os.path.basename(ifile)) + ) + elif extension in [".bam"]: pass else: - XMLStr += '\t\t\n' + % (ifile, os.path.basename(ifile)) + ) + XMLStr += "\t\n" + # XMLStr += '\t\n\t\t\n\t\t\n\t\t\n\t\n' + XMLStr += "" + XMLOut = open(XMLOut, "w") XMLOut.write(XMLStr) XMLOut.close() + ############################################ ############################################ ## RUN FUNCTION ############################################ ############################################ -igv_files_to_session(XMLOut=args.XML_OUT,ListFile=args.LIST_FILE,Genome=args.GENOME,PathPrefix=args.PATH_PREFIX) +igv_files_to_session( + XMLOut=args.XML_OUT, + ListFile=args.LIST_FILE, + ReplaceFile=args.REPLACE_FILE, + Genome=args.GENOME, + PathPrefix=args.PATH_PREFIX, +) ############################################ ############################################ diff --git a/bin/macs2_merged_expand.py b/bin/macs2_merged_expand.py index f4e84a14d..aa401123f 100755 --- a/bin/macs2_merged_expand.py +++ b/bin/macs2_merged_expand.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 ####################################################################### ####################################################################### @@ -16,19 +16,35 @@ ############################################ ############################################ -Description = 'Add sample boolean files and aggregate columns from merged MACS narrow or broad peak file.' +Description = "Add sample boolean files and aggregate columns from merged MACS narrow or broad peak file." Epilog = """Example usage: python macs2_merged_expand.py --is_narrow_peak --min_replicates 1""" argParser = argparse.ArgumentParser(description=Description, epilog=Epilog) ## REQUIRED PARAMETERS -argParser.add_argument('MERGED_INTERVAL_FILE', help="Merged MACS2 interval file created using linux sort and mergeBed.") -argParser.add_argument('SAMPLE_NAME_LIST', help="Comma-separated list of sample names as named in individual MACS2 broadPeak/narrowPeak output file e.g. SAMPLE_R1 for SAMPLE_R1_peak_1.") -argParser.add_argument('OUTFILE', help="Full path to output directory.") +argParser.add_argument("MERGED_INTERVAL_FILE", help="Merged MACS2 interval file created using linux sort and mergeBed.") +argParser.add_argument( + "SAMPLE_NAME_LIST", + help="Comma-separated list of sample names as named in individual MACS2 broadPeak/narrowPeak output file e.g. SAMPLE_R1 for SAMPLE_R1_peak_1.", +) +argParser.add_argument("OUTFILE", help="Full path to output directory.") ## OPTIONAL PARAMETERS -argParser.add_argument('-in', '--is_narrow_peak', dest="IS_NARROW_PEAK", help="Whether merged interval file was generated from narrow or broad peak files (default: False).",action='store_true') -argParser.add_argument('-mr', '--min_replicates', type=int, dest="MIN_REPLICATES", default=1, help="Minumum number of replicates per sample required to contribute to merged peak (default: 1).") +argParser.add_argument( + "-in", + "--is_narrow_peak", + dest="IS_NARROW_PEAK", + help="Whether merged interval file was generated from narrow or broad peak files (default: False).", + action="store_true", +) +argParser.add_argument( + "-mr", + "--min_replicates", + type=int, + dest="MIN_REPLICATES", + default=1, + help="Minumum number of replicates per sample required to contribute to merged peak (default: 1).", +) args = argParser.parse_args() ############################################ @@ -37,6 +53,7 @@ ############################################ ############################################ + def makedir(path): if not len(path) == 0: @@ -46,6 +63,7 @@ def makedir(path): if exception.errno != errno.EEXIST: raise + ############################################ ############################################ ## MAIN FUNCTION @@ -58,36 +76,50 @@ def makedir(path): ## 2) narrowPeak ## sort -k1,1 -k2,2n | mergeBed -c 2,3,4,5,6,7,8,9,10 -o collapse,collapse,collapse,collapse,collapse,collapse,collapse,collapse,collapse > merged_peaks.txt -def macs2_merged_expand(MergedIntervalTxtFile,SampleNameList,OutFile,isNarrow=False,minReplicates=1): + +def macs2_merged_expand(MergedIntervalTxtFile, SampleNameList, OutFile, isNarrow=False, minReplicates=1): makedir(os.path.dirname(OutFile)) combFreqDict = {} totalOutIntervals = 0 SampleNameList = sorted(SampleNameList) - fin = open(MergedIntervalTxtFile,'r') - fout = open(OutFile,'w') - oFields = ['chr','start','end','interval_id','num_peaks','num_samples'] + [x+'.bool' for x in SampleNameList] + [x+'.fc' for x in SampleNameList] + [x+'.qval' for x in SampleNameList] + [x+'.pval' for x in SampleNameList] + [x+'.start' for x in SampleNameList] + [x+'.end' for x in SampleNameList] + fin = open(MergedIntervalTxtFile, "r") + fout = open(OutFile, "w") + oFields = ( + ["chr", "start", "end", "interval_id", "num_peaks", "num_samples"] + + [x + ".bool" for x in SampleNameList] + + [x + ".fc" for x in SampleNameList] + + [x + ".qval" for x in SampleNameList] + + [x + ".pval" for x in SampleNameList] + + [x + ".start" for x in SampleNameList] + + [x + ".end" for x in SampleNameList] + ) if isNarrow: - oFields += [x+'.summit' for x in SampleNameList] - fout.write('\t'.join(oFields) + '\n') + oFields += [x + ".summit" for x in SampleNameList] + fout.write("\t".join(oFields) + "\n") while True: line = fin.readline() if line: - lspl = line.strip().split('\t') - - chromID = lspl[0]; mstart = int(lspl[1]); mend = int(lspl[2]); - starts = [int(x) for x in lspl[3].split(',')]; ends = [int(x) for x in lspl[4].split(',')] - names = lspl[5].split(','); fcs = [float(x) for x in lspl[8].split(',')] - pvals = [float(x) for x in lspl[9].split(',')]; qvals = [float(x) for x in lspl[10].split(',')] + lspl = line.strip().split("\t") + + chromID = lspl[0] + mstart = int(lspl[1]) + mend = int(lspl[2]) + starts = [int(x) for x in lspl[3].split(",")] + ends = [int(x) for x in lspl[4].split(",")] + names = lspl[5].split(",") + fcs = [float(x) for x in lspl[8].split(",")] + pvals = [float(x) for x in lspl[9].split(",")] + qvals = [float(x) for x in lspl[10].split(",")] summits = [] if isNarrow: - summits = [int(x) for x in lspl[11].split(',')] + summits = [int(x) for x in lspl[11].split(",")] ## GROUP SAMPLES BY REMOVING TRAILING *_R* groupDict = {} - for sID in ['_'.join(x.split('_')[:-2]) for x in names]: - gID = '_'.join(sID.split('_')[:-1]) + for sID in ["_".join(x.split("_")[:-2]) for x in names]: + gID = "_".join(sID.split("_")[:-1]) if gID not in groupDict: groupDict[gID] = [] if sID not in groupDict[gID]: @@ -95,14 +127,19 @@ def macs2_merged_expand(MergedIntervalTxtFile,SampleNameList,OutFile,isNarrow=Fa ## GET SAMPLES THAT PASS REPLICATE THRESHOLD passRepThreshList = [] - for gID,sIDs in groupDict.items(): + for gID, sIDs in groupDict.items(): if len(sIDs) >= minReplicates: passRepThreshList += sIDs ## GET VALUES FROM INDIVIDUAL PEAK SETS - fcDict = {}; qvalDict = {}; pvalDict = {}; startDict = {}; endDict = {}; summitDict = {} + fcDict = {} + qvalDict = {} + pvalDict = {} + startDict = {} + endDict = {} + summitDict = {} for idx in range(len(names)): - sample = '_'.join(names[idx].split('_')[:-2]) + sample = "_".join(names[idx].split("_")[:-2]) if sample in passRepThreshList: if sample not in fcDict: fcDict[sample] = [] @@ -127,16 +164,25 @@ def macs2_merged_expand(MergedIntervalTxtFile,SampleNameList,OutFile,isNarrow=Fa samples = sorted(fcDict.keys()) if samples != []: numSamples = len(samples) - boolList = ['TRUE' if x in samples else 'FALSE' for x in SampleNameList] - fcList = [';'.join(fcDict[x]) if x in samples else 'NA' for x in SampleNameList] - qvalList = [';'.join(qvalDict[x]) if x in samples else 'NA' for x in SampleNameList] - pvalList = [';'.join(pvalDict[x]) if x in samples else 'NA' for x in SampleNameList] - startList = [';'.join(startDict[x]) if x in samples else 'NA' for x in SampleNameList] - endList = [';'.join(endDict[x]) if x in samples else 'NA' for x in SampleNameList] - oList = [str(x) for x in [chromID,mstart,mend,'Interval_'+str(totalOutIntervals+1),len(names),numSamples]+boolList+fcList+qvalList+pvalList+startList+endList] + boolList = ["TRUE" if x in samples else "FALSE" for x in SampleNameList] + fcList = [";".join(fcDict[x]) if x in samples else "NA" for x in SampleNameList] + qvalList = [";".join(qvalDict[x]) if x in samples else "NA" for x in SampleNameList] + pvalList = [";".join(pvalDict[x]) if x in samples else "NA" for x in SampleNameList] + startList = [";".join(startDict[x]) if x in samples else "NA" for x in SampleNameList] + endList = [";".join(endDict[x]) if x in samples else "NA" for x in SampleNameList] + oList = [ + str(x) + for x in [chromID, mstart, mend, "Interval_" + str(totalOutIntervals + 1), len(names), numSamples] + + boolList + + fcList + + qvalList + + pvalList + + startList + + endList + ] if isNarrow: - oList += [';'.join(summitDict[x]) if x in samples else 'NA' for x in SampleNameList] - fout.write('\t'.join(oList) + '\n') + oList += [";".join(summitDict[x]) if x in samples else "NA" for x in SampleNameList] + fout.write("\t".join(oList) + "\n") tsamples = tuple(sorted(samples)) if tsamples not in combFreqDict: @@ -151,19 +197,26 @@ def macs2_merged_expand(MergedIntervalTxtFile,SampleNameList,OutFile,isNarrow=Fa ## WRITE FILE FOR INTERVAL INTERSECT ACROSS SAMPLES. ## COMPATIBLE WITH UPSETR PACKAGE. - fout = open(OutFile[:-4]+'.intersect.txt','w') - combFreqItems = sorted([(combFreqDict[x],x) for x in combFreqDict.keys()],reverse=True) - for k,v in combFreqItems: - fout.write('%s\t%s\n' % ('&'.join(v),k)) + fout = open(OutFile[:-4] + ".intersect.txt", "w") + combFreqItems = sorted([(combFreqDict[x], x) for x in combFreqDict.keys()], reverse=True) + for k, v in combFreqItems: + fout.write("%s\t%s\n" % ("&".join(v), k)) fout.close() + ############################################ ############################################ ## RUN FUNCTION ############################################ ############################################ -macs2_merged_expand(MergedIntervalTxtFile=args.MERGED_INTERVAL_FILE,SampleNameList=args.SAMPLE_NAME_LIST.split(','),OutFile=args.OUTFILE,isNarrow=args.IS_NARROW_PEAK,minReplicates=args.MIN_REPLICATES) +macs2_merged_expand( + MergedIntervalTxtFile=args.MERGED_INTERVAL_FILE, + SampleNameList=args.SAMPLE_NAME_LIST.split(","), + OutFile=args.OUTFILE, + isNarrow=args.IS_NARROW_PEAK, + minReplicates=args.MIN_REPLICATES, +) ############################################ ############################################ diff --git a/bin/markdown_to_html.py b/bin/markdown_to_html.py deleted file mode 100755 index 57cc4263f..000000000 --- a/bin/markdown_to_html.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -import argparse -import markdown -import os -import sys - -def convert_markdown(in_fn): - input_md = open(in_fn, mode="r", encoding="utf-8").read() - html = markdown.markdown( - "[TOC]\n" + input_md, - extensions = [ - 'pymdownx.extra', - 'pymdownx.b64', - 'pymdownx.highlight', - 'pymdownx.emoji', - 'pymdownx.tilde', - 'toc' - ], - extension_configs = { - 'pymdownx.b64': { - 'base_path': os.path.dirname(in_fn) - }, - 'pymdownx.highlight': { - 'noclasses': True - }, - 'toc': { - 'title': 'Table of Contents' - } - } - ) - return html - -def wrap_html(contents): - header = """ - - - - - -
- """ - footer = """ -
- - - """ - return header + contents + footer - - -def parse_args(args=None): - parser = argparse.ArgumentParser() - parser.add_argument('mdfile', type=argparse.FileType('r'), nargs='?', - help='File to convert. Defaults to stdin.') - parser.add_argument('-o', '--out', type=argparse.FileType('w'), - default=sys.stdout, - help='Output file name. Defaults to stdout.') - return parser.parse_args(args) - -def main(args=None): - args = parse_args(args) - converted_md = convert_markdown(args.mdfile.name) - html = wrap_html(converted_md) - args.out.write(html) - -if __name__ == '__main__': - sys.exit(main()) diff --git a/bin/plot_homer_annotatepeaks.r b/bin/plot_homer_annotatepeaks.r index 4a867d8f8..fc2096eb9 100755 --- a/bin/plot_homer_annotatepeaks.r +++ b/bin/plot_homer_annotatepeaks.r @@ -115,52 +115,52 @@ pdf(PlotFile,height=6,width=3*length(HomerFiles)) ## FEATURE COUNT STACKED BARPLOT plot <- ggplot(plot.feature.dat, aes(x=variable, y=value, group=feature)) + - geom_bar(stat="identity", position = "fill", aes(colour=feature,fill=feature), alpha = 0.3) + - xlab("") + - ylab("% Feature") + - ggtitle("Peak Location Relative to Annotation") + - scale_y_continuous(labels = percent_format()) + - theme(panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.background = element_blank(), - axis.text.y = element_text(colour="black"), - axis.text.x= element_text(colour="black",face="bold"), - axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), - axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) + geom_bar(stat="identity", position = "fill", aes(colour=feature,fill=feature), alpha = 0.3) + + xlab("") + + ylab("% Feature") + + ggtitle("Peak Location Relative to Annotation") + + scale_y_continuous(labels = percent_format()) + + theme(panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.text.y = element_text(colour="black"), + axis.text.x= element_text(colour="black",face="bold"), + axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), + axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) print(plot) ## DISTANCE TO CLOSEST GENE ACROSS ALL PEAKS STACKED BARPLOT plot <- ggplot(plot.dist.dat, aes(x=variable, y=value, group=distance)) + - geom_bar(stat="identity", position = "fill", aes(colour=distance,fill=distance), alpha = 0.3) + - xlab("") + - ylab("% Unique genes to closest peak") + - ggtitle("Distance of Closest Peak to Gene") + - scale_y_continuous(labels = percent_format()) + - theme(panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.background = element_blank(), - axis.text.y = element_text(colour="black"), - axis.text.x= element_text(colour="black",face="bold"), - axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), - axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) + geom_bar(stat="identity", position = "fill", aes(colour=distance,fill=distance), alpha = 0.3) + + xlab("") + + ylab("% Unique genes to closest peak") + + ggtitle("Distance of Closest Peak to Gene") + + scale_y_continuous(labels = percent_format()) + + theme(panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.text.y = element_text(colour="black"), + axis.text.x= element_text(colour="black",face="bold"), + axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), + axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) print(plot) ## VIOLIN PLOT OF PEAK DISTANCE TO TSS plot <- ggplot(plot.dat, aes(x=name, y=Distance.to.TSS)) + - geom_violin(aes(colour=name,fill=name), alpha = 0.3) + - geom_boxplot(width=0.1) + - xlab("") + - ylab(expression(log[10]*" distance to TSS")) + - ggtitle("Peak Distribution Relative to TSS") + - scale_y_continuous(trans='log10',breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + - theme(legend.position="none", - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.background = element_blank(), - axis.text.y = element_text(colour="black"), - axis.text.x= element_text(colour="black",face="bold"), - axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), - axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) + geom_violin(aes(colour=name,fill=name), alpha = 0.3) + + geom_boxplot(width=0.1) + + xlab("") + + ylab(expression(log[10]*" distance to TSS")) + + ggtitle("Peak Distribution Relative to TSS") + + scale_y_continuous(trans='log10',breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) + + theme(legend.position="none", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.text.y = element_text(colour="black"), + axis.text.x= element_text(colour="black",face="bold"), + axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), + axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) print(plot) dev.off() diff --git a/bin/plot_macs_qc.r b/bin/plot_macs2_qc.r similarity index 77% rename from bin/plot_macs_qc.r rename to bin/plot_macs2_qc.r index b8e25d567..5cf074de6 100755 --- a/bin/plot_macs_qc.r +++ b/bin/plot_macs2_qc.r @@ -96,19 +96,19 @@ write.table(summary.dat,file=SummaryFile,quote=FALSE,sep="\t",row.names=FALSE,co violin.plot <- function(plot.dat,x,y,ylab,title,log) { plot <- ggplot(plot.dat, aes_string(x=x, y=y)) + - geom_violin(aes_string(colour=x,fill=x), alpha = 0.3) + - geom_boxplot(width=0.1) + - xlab("") + - ylab(ylab) + - ggtitle(title) + - theme(legend.position="none", - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.background = element_blank(), - axis.text.y = element_text(colour="black"), - axis.text.x= element_text(colour="black",face="bold"), - axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), - axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) + geom_violin(aes_string(colour=x,fill=x), alpha = 0.3) + + geom_boxplot(width=0.1) + + xlab("") + + ylab(ylab) + + ggtitle(title) + + theme(legend.position="none", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.text.y = element_text(colour="black"), + axis.text.x= element_text(colour="black",face="bold"), + axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), + axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) if (log == 10) { plot <- plot + scale_y_continuous(trans='log10',breaks = trans_breaks("log10", function(x) 10^x), labels = trans_format("log10", math_format(10^.x))) } @@ -127,19 +127,19 @@ pdf(PlotFile,height=6,width=3*length(unique(plot.dat$name))) peak.count.dat <- as.data.frame(table(plot.dat$name)) colnames(peak.count.dat) <- c("name","count") plot <- ggplot(peak.count.dat, aes(x=name, y=count)) + - geom_bar(stat="identity",aes(colour=name,fill=name), position = "dodge", width = 0.8, alpha = 0.3) + - xlab("") + - ylab("Number of peaks") + - ggtitle("Peak count") + - theme(legend.position="none", - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - panel.background = element_blank(), - axis.text.y = element_text(colour="black"), - axis.text.x= element_text(colour="black",face="bold"), - axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), - axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) + - geom_text(aes(label = count, x = name, y = count), position = position_dodge(width = 0.8), vjust = -0.6) + geom_bar(stat="identity",aes(colour=name,fill=name), position = "dodge", width = 0.8, alpha = 0.3) + + xlab("") + + ylab("Number of peaks") + + ggtitle("Peak count") + + theme(legend.position="none", + panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.text.y = element_text(colour="black"), + axis.text.x= element_text(colour="black",face="bold"), + axis.line.x = element_line(size = 1, colour = "black", linetype = "solid"), + axis.line.y = element_line(size = 1, colour = "black", linetype = "solid")) + + geom_text(aes(label = count, x = name, y = count), position = position_dodge(width = 0.8), vjust = -0.6) print(plot) ## VIOLIN PLOTS diff --git a/bin/scrape_software_versions.py b/bin/scrape_software_versions.py deleted file mode 100755 index 6f61aa86b..000000000 --- a/bin/scrape_software_versions.py +++ /dev/null @@ -1,78 +0,0 @@ -#!/usr/bin/env python -from __future__ import print_function -from collections import OrderedDict -import re - -regexes = { - 'nf-core/chipseq': ['v_pipeline.txt', r"(\S+)"], - 'Nextflow': ['v_nextflow.txt', r"(\S+)"], - 'FastQC': ['v_fastqc.txt', r"FastQC v(\S+)"], - 'Trim Galore!': ['v_trim_galore.txt', r"version (\S+)"], - 'BWA': ['v_bwa.txt', r"Version: (\S+)"], - 'Samtools': ['v_samtools.txt', r"samtools (\S+)"], - 'BEDTools': ['v_bedtools.txt', r"bedtools v(\S+)"], - 'BamTools': ['v_bamtools.txt', r"bamtools (\S+)"], - 'deepTools': ['v_deeptools.txt', r"plotFingerprint (\S+)"], - 'Picard': ['v_picard.txt', r"\n(\S+)"], - 'R': ['v_R.txt', r"R version (\S+)"], - 'Pysam': ['v_pysam.txt', r"(\S+)"], - 'MACS2': ['v_macs2.txt', r"macs2 (\S+)"], - 'HOMER': ['v_homer.txt', r"(\S+)"], - 'featureCounts': ['v_featurecounts.txt', r"featureCounts v(\S+)"], - 'Preseq': ['v_preseq.txt', r"Version: (\S+)"], - 'MultiQC': ['v_multiqc.txt', r"multiqc, version (\S+)"], -} - -results = OrderedDict() -results['nf-core/chipseq'] = 'N/A' -results['Nextflow'] = 'N/A' -results['FastQC'] = 'N/A' -results['Trim Galore!'] = 'N/A' -results['BWA'] = 'N/A' -results['Samtools'] = 'N/A' -results['BEDTools'] = 'N/A' -results['BamTools'] = 'N/A' -results['deepTools'] = 'N/A' -results['Picard'] = 'N/A' -results['R'] = 'N/A' -results['Pysam'] = 'N/A' -results['MACS2'] = 'N/A' -results['HOMER'] = False -results['featureCounts'] = 'N/A' -results['Preseq'] = 'N/A' -results['MultiQC'] = 'N/A' - -# Search each file using its regex -for k, v in regexes.items(): - try: - with open(v[0]) as x: - versions = x.read() - match = re.search(v[1], versions) - if match: - results[k] = "v{}".format(match.group(1)) - except IOError: - results[k] = False - -# Remove software set to false in results -for k in list(results): - if not results[k]: - del(results[k]) - -# Dump to YAML -print (''' -id: 'software_versions' -section_name: 'nf-core/chipseq Software Versions' -section_href: 'https://github.com/nf-core/chipseq' -plot_type: 'html' -description: 'are collected at run time from the software output.' -data: | -
-''') -for k,v in results.items(): - print("
{}
{}
".format(k,v)) -print ("
") - -# Write out regexes as csv file: -with open('software_versions.csv', 'w') as f: - for k,v in results.items(): - f.write("{}\t{}\n".format(k,v)) diff --git a/conf/base.config b/conf/base.config index f6b9dec38..b38d1fe3b 100644 --- a/conf/base.config +++ b/conf/base.config @@ -1,48 +1,63 @@ /* - * ------------------------------------------------- - * nf-core/chipseq Nextflow base config file - * ------------------------------------------------- - * A 'blank slate' config file, appropriate for general - * use on most high performace compute environments. - * Assumes that all software is installed and available - * on the PATH. Runs in `local` mode - all jobs will be - * run on the logged in environment. - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/chipseq Nextflow base config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + A 'blank slate' config file, appropriate for general use on most high performance + compute environments. Assumes that all software is installed and available on + the PATH. Runs in `local` mode - all jobs will be run on the logged in environment. +---------------------------------------------------------------------------------------- +*/ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { check_max( 1 * task.attempt, 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } - errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } - maxRetries = 1 - maxErrors = '-1' - - // Process-specific resource requirements - withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 6.h * task.attempt, 'time' ) } - } - withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } - } - withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } - } - withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } - } - withLabel:error_ignore { - errorStrategy = 'ignore' - } - withName:get_software_versions { - cache = false - } + errorStrategy = { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'finish' } + maxRetries = 1 + maxErrors = '-1' + // Process-specific resource requirements + // NOTE - Please try and re-use the labels below as much as possible. + // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. + // If possible, it would be nice to keep the same label naming convention when + // adding in your local modules too. + // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_single { + cpus = { check_max( 1 , 'cpus' ) } + memory = { check_max( 6.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_low { + cpus = { check_max( 2 * task.attempt, 'cpus' ) } + memory = { check_max( 12.GB * task.attempt, 'memory' ) } + time = { check_max( 4.h * task.attempt, 'time' ) } + } + withLabel:process_medium { + cpus = { check_max( 6 * task.attempt, 'cpus' ) } + memory = { check_max( 36.GB * task.attempt, 'memory' ) } + time = { check_max( 8.h * task.attempt, 'time' ) } + } + withLabel:process_high { + cpus = { check_max( 12 * task.attempt, 'cpus' ) } + memory = { check_max( 72.GB * task.attempt, 'memory' ) } + time = { check_max( 16.h * task.attempt, 'time' ) } + } + withLabel:process_long { + time = { check_max( 20.h * task.attempt, 'time' ) } + } + withLabel:process_high_memory { + memory = { check_max( 200.GB * task.attempt, 'memory' ) } + } + withLabel:error_ignore { + errorStrategy = 'ignore' + } + withLabel:error_retry { + errorStrategy = 'retry' + maxRetries = 2 + } + withName:CUSTOM_DUMPSOFTWAREVERSIONS { + cache = false + } } diff --git a/conf/igenomes.config b/conf/igenomes.config index 2de924228..7f282cee1 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -1,420 +1,684 @@ /* - * ------------------------------------------------- - * Nextflow config file for iGenomes paths - * ------------------------------------------------- - * Defines reference genomes, using iGenome paths - * Can be used by any config that customises the base - * path using $params.igenomes_base / --igenomes_base - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for iGenomes paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines reference genomes using iGenome paths. + Can be used by any config that customises the base path using: + $params.igenomes_base / --igenomes_base +---------------------------------------------------------------------------------------- +*/ params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${baseDir}/assets/blacklists/GRCh37-blacklist.bed" + // illumina iGenomes reference file paths + genomes { + 'GRCh37' { + fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" + mito_name = "MT" + blacklist = "${projectDir}/assets/blacklists/v1.0/GRCh37-blacklist.v1.bed" + macs_gsize = [ + "50" : 2684219875, + "75" : 2733035409, + "100" : 2774803719, + "150" : 2824648687, + "200" : 2848794782 + ] + } + 'GRCh38' { + fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" + mito_name = "chrM" + blacklist = "${projectDir}/assets/blacklists/v3.0/hg38-blacklist.v3.bed" + macs_gsize = [ + "50" : 2701262066, + "75" : 2749859687, + "100" : 2805665311, + "150" : 2862089864, + "200" : 2892537351 + ] + } + 'GRCm38' { + fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" + mito_name = "MT" + blacklist = "${projectDir}/assets/blacklists/v2.0/GRCm38-blacklist.v2.bed" + macs_gsize = [ + "50" : 2307679482, + "75" : 2406655830, + "100" : 2466184610, + "150" : 2492306232, + "200" : 2519386924 + ] + } + 'TAIR10' { + fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" + mito_name = "Mt" + macs_gsize = [ + "50" : 114339094, + "75" : 115317469, + "100" : 118459858, + "150" : 118504138, + "200" : 117723393 + ] + } + 'EB2' { + fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" + macs_gsize = [ + "50" : 4150072, + "75" : 4191132, + "100" : 4198752, + "150" : 4176800, + "200" : 4197072 + ] + } + 'UMD3.1' { + fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" + mito_name = "MT" + macs_gsize = [ + "50" : 2370644326, + "75" : 2480511357, + "100" : 2567220492, + "150" : 2594494201, + "200" : 2648740387 + ] + } + 'WBcel235' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" + mito_name = "MtDNA" + macs_gsize = [ + "50" : 95159402, + "75" : 96945370, + "100" : 98259898, + "150" : 98721103, + "200" : 98672558 + ] + } + 'CanFam3.1' { + fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" + mito_name = "MT" + macs_gsize = [ + "50" : 2237684358, + "75" : 2279860111, + "100" : 2293979635, + "150" : 2300527794, + "200" : 2313332891 + ] + } + 'GRCz10' { + fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = [ + "50" : 1172895610, + "75" : 1229400206, + "100" : 1253908756, + "150" : 1285330773, + "200" : 1292538906 + ] + } + 'BDGP6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" + mito_name = "M" + macs_gsize = [ + "50" : 123519388, + "75" : 124886264, + "100" : 126807034, + "150" : 126903604, + "200" : 128575605 + ] + } + 'EquCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" + mito_name = "MT" + macs_gsize = [ + "50" : 2294980416, + "75" : 2289244826, + "100" : 2334155865, + "150" : 2343297042, + "200" : 2350515523 + ] + } + 'EB1' { + fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" + macs_gsize = [ + "50" : 4481912, + "75" : 4485018, + "100" : 4468952, + "150" : 4489684, + "200" : 4527891 + ] + } + 'Galgal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = [ + "50" : 974987959, + "75" : 978772437, + "100" : 984935167, + "150" : 979442039, + "200" : 991678648 + ] + } + 'Gm01' { + fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" + macs_gsize = [ + "50" : 748112428, + "75" : 826455017, + "100" : 857283568, + "150" : 895077451, + "200" : 911783687 + ] + } + 'Mmul_1' { + fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" + mito_name = "MT" + macs_gsize = [ + "50" : 2498932238, + "75" : 2598624693, + "100" : 2642166663, + "150" : 2661433343, + "200" : 2674888870 + ] + } + 'IRGSP-1.0' { + fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" + mito_name = "Mt" + macs_gsize = [ + "50" : 322594956, + "75" : 337043804, + "100" : 345775274, + "150" : 355020671, + "200" : 363478234 + ] + } + 'CHIMP2.1.4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" + mito_name = "MT" + macs_gsize = [ + "50" : 2576111695, + "75" : 2702821987, + "100" : 2733435831, + "150" : 2735167196, + "200" : 2738912507 + ] + } + 'Rnor_5.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = [ + "50" : 2303951475, + "75" : 2367071843, + "100" : 2402745922, + "150" : 2405692811, + "200" : 2407324495 + ] + } + 'Rnor_6.0' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = [ + "50" : 2375372135, + "75" : 2440746491, + "100" : 2480029900, + "150" : 2477334634, + "200" : 2478552171 + ] + } + 'R64-1-1' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" + mito_name = "MT" + macs_gsize = [ + "50" : 11624332, + "75" : 11693438, + "100" : 11777680, + "150" : 11783749, + "200" : 11825681 + ] + } + 'EF2' { + fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" + mito_name = "MT" + macs_gsize = [ + "50" : 12190646, + "75" : 12291456, + "100" : 12346649, + "150" : 12403911, + "200" : 12442064 + ] + } + 'Sbi1' { + fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" + macs_gsize = [ + "50" : 444102512, + "75" : 506986021, + "100" : 540037446, + "150" : 575130820, + "200" : 595857042 + ] + } + 'Sscrofa10.2' { + fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" + mito_name = "MT" + macs_gsize = [ + "50" : 2105185708, + "75" : 2131615607, + "100" : 2149244400, + "150" : 2189757848, + "200" : 2203893315 + ] + } + 'AGPv3' { + fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" + mito_name = "Mt" + macs_gsize = [ + "50" : 1113453752, + "75" : 1392458449, + "100" : 1579923466, + "150" : 1729475311, + "200" : 1841419596 + ] + } + 'hg38' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" + mito_name = "chrM" + blacklist = "${projectDir}/assets/blacklists/v3.0/hg38-blacklist.v3.bed" + macs_gsize = [ + "50" : 2701262066, + "75" : 2749859687, + "100" : 2805665311, + "150" : 2862089864, + "200" : 2892537351 + ] + } + 'hg19' { + fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" + mito_name = "chrM" + blacklist = "${projectDir}/assets/blacklists/v1.0/hg19-blacklist.v1.bed" + macs_gsize = [ + "50" : 2684219875, + "75" : 2733035409, + "100" : 2774803719, + "150" : 2824648687, + "200" : 2848794782 + ] + } + 'mm10' { + fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" + mito_name = "chrM" + blacklist = "${projectDir}/assets/blacklists/v2.0/mm10-blacklist.v2.bed" + macs_gsize = [ + "50" : 2307679482, + "75" : 2406655830, + "100" : 2466184610, + "150" : 2492306232, + "200" : 2519386924 + ] + } + 'bosTau8' { + fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = [ + "50" : 2370644326, + "75" : 2480511357, + "100" : 2567220492, + "150" : 2594494201, + "200" : 2648740387 + ] + } + 'ce10' { + fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = [ + "50" : 95156190, + "75" : 96995949, + "100" : 98287299, + "150" : 98879728, + "200" : 98769409 + ] + } + 'canFam3' { + fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = [ + "50" : 2237684358, + "75" : 2279860111, + "100" : 2293979635, + "150" : 2300527794, + "200" : 2313332891 + ] + } + 'danRer10' { + fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = [ + "50" : 1172895610, + "75" : 1229400206, + "100" : 1253908756, + "150" : 1285330773, + "200" : 1292538906 + ] + } + 'dm6' { + fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = [ + "50" : 123548253, + "75" : 124886264, + "100" : 126807034, + "150" : 126908682, + "200" : 128599061 + ] + } + 'equCab2' { + fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = [ + "50" : 2294980416, + "75" : 2289244826, + "100" : 2334155865, + "150" : 2343297042, + "200" : 2350515523 + ] + } + 'galGal4' { + fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = [ + "50" : 974987959, + "75" : 978772437, + "100" : 984935167, + "150" : 979442039, + "200" : 991678648 + ] + } + 'panTro4' { + fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = [ + "50" : 2576111695, + "75" : 2702821987, + "100" : 2733435831, + "150" : 2735167196, + "200" : 2738912507 + ] + } + 'rn6' { + fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" + mito_name = "chrM" + macs_gsize = [ + "50" : 2375372135, + "75" : 2440746491, + "100" : 2480029900, + "150" : 2477334634, + "200" : 2478552171 + ] + } + 'sacCer3' { + fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" + readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = [ + "50" : "11624332", + "75" : "11693438", + "100" : "11777680", + "150" : "11783749", + "200" : "11825681" + ] + } + 'susScr3' { + fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" + bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" + bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" + star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" + bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" + gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" + bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" + readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" + mito_name = "chrM" + macs_gsize = [ + "50" : 2105185708, + "75" : 2131615607, + "100" : 2149244400, + "150" : 2189757848, + "200" : 2203893315 + ] + } } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" - } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${baseDir}/assets/blacklists/GRCm38-blacklist.bed" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - mito_name = "Mt" - } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" - } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - mito_name = "MT" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mito_name = "MtDNA" - macs_gsize = "9e7" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - mito_name = "MT" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mito_name = "M" - macs_gsize = "1.2e8" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" - mito_name = "MT" - } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - mito_name = "MT" - } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - mito_name = "MT" - } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - mito_name = "MT" - macs_gsize = "1.2e7" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.21e7" - } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" - } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" - mito_name = "MT" - } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${baseDir}/assets/blacklists/hg38-blacklist.bed" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${baseDir}/assets/blacklists/hg19-blacklist.bed" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${baseDir}/assets/blacklists/mm10-blacklist.bed" - } - 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "9e7" - } - 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - mito_name = "chrM" - } - 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.2e8" - } - 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - mito_name = "chrM" - } - 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - mito_name = "chrM" - } - 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - mito_name = "chrM" - } - 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.2e7" - } - 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/genome.fa" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - mito_name = "chrM" - } - } } diff --git a/conf/modules.config b/conf/modules.config new file mode 100644 index 000000000..b46fec836 --- /dev/null +++ b/conf/modules.config @@ -0,0 +1,739 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// General configuration options +// + +process { + publishDir = [ + path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:INPUT_CHECK:SAMPLESHEET_CHECK' { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: CUSTOM_DUMPSOFTWAREVERSIONS { + publishDir = [ + path: { "${params.outdir}/pipeline_info" }, + mode: params.publish_dir_mode, + pattern: '*_versions.yml' + ] + } +} + +// +// Genome preparation options +// + +process { + withName: 'GUNZIP_.*' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'UNTAR_.*' { + ext.args2 = '--no-same-owner' + publishDir = [ + path: { "${params.outdir}/genome/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'BWA_INDEX|BOWTIE2_BUILD|STAR_GENOMEGENERATE' { + publishDir = [ + path: { "${params.outdir}/genome/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'UNTAR_CHROMAP_INDEX|CHROMAP_INDEX' { + publishDir = [ + path: { "${params.outdir}/genome/index/chromap" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'GFFREAD' { + ext.args = '--keep-exon-attrs -F -T' + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'GTF2BED' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'CUSTOM_GETCHROMSIZES' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'GENOME_BLACKLIST_REGIONS' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} + +// +// Read QC and trimming options +// + +if (!(params.skip_fastqc || params.skip_qc)) { + process { + withName: '.*:FASTQC_TRIMGALORE:FASTQC' { + ext.args = '--quiet' + publishDir = [ + [ + path: { "${params.outdir}/fastqc" }, + mode: params.publish_dir_mode, + pattern: "*.{html}" + ], + [ + path: { "${params.outdir}/fastqc/zips" }, + mode: params.publish_dir_mode, + pattern: "*.{zip}" + ] + ] + } + } +} + +if (!params.skip_trimming) { + process { + withName: '.*:FASTQC_TRIMGALORE:TRIMGALORE' { + ext.args = [ + '--fastqc', + params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '' + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/trimgalore/fastqc" }, + mode: params.publish_dir_mode, + pattern: "*.{html}" + ], + [ + path: { "${params.outdir}/trimgalore/fastqc/zips" }, + mode: params.publish_dir_mode, + pattern: "*.{zip}" + ], + [ + path: { "${params.outdir}/trimgalore/logs" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ], + [ + path: { "${params.outdir}/trimgalore" }, + mode: params.publish_dir_mode, + pattern: "*.fq.gz", + enabled: params.save_trimmed + ] + ] + } + } +} + +process { + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:ALIGN_.*:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.Lb.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/library" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_align_intermeds + ] + } + + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:ALIGN_.*:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/library" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_align_intermeds + ] + } + + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:ALIGN_.*:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:SAMTOOLS_.*' { + ext.prefix = { "${meta.id}.Lb.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/library/samtools_stats/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_align_intermeds + ] + } +} + +if (params.aligner == 'bwa') { + process { + withName: 'BWA_MEM' { + ext.args = { [ + '-M', + params.bwa_min_score ? " -T ${params.bwa_min_score}" : '', + meta.read_group ? "-R ${meta.read_group}": '' + ].join(' ').trim() } + ext.args2 = '-bhS -F 0x0100 -O BAM' + ext.prefix = { "${meta.id}.Lb" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/library" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: false + ] + } + } +} + +if (params.aligner == 'bowtie2') { + process { + withName: 'BOWTIE2_ALIGN' { + ext.args = '' + ext.prefix = { "${meta.id}.Lb" } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/library" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: false + ], + [ + path: { "${params.outdir}/${params.aligner}/library/unmapped" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_unaligned + ] + ] + } + } +} + +if (params.aligner == 'chromap') { + process { + withName: CHROMAP_INDEX { + ext.args = '' + publishDir = [ + path: { "${params.outdir}/genome/${params.aligner}/index" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: CHROMAP_CHROMAP { + ext.args = '-l 2000 --low-mem --SAM' + ext.prefix = { "${meta.id}.Lb" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/library" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: false + ] + } + } +} + +if (params.aligner == 'star') { + process { + withName: '.*:ALIGN_STAR:STAR_ALIGN' { + ext.args = [ + '--runMode alignReads', + '--alignIntronMax 1', + '--alignEndsType EndToEnd', + '--outSAMtype BAM Unsorted', + '--readFilesCommand zcat', + '--runRNGseed 0', + '--outSAMattributes NH HI AS NM MD', + params.save_unaligned ? '--outReadsUnmapped Fastx' : '' + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/library/log" }, + mode: params.publish_dir_mode, + pattern: '*.{out,tab}' + ], + [ + path: { "${params.outdir}/${params.aligner}/library" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: false + ], + [ + path: { "${params.outdir}/${params.aligner}/library/unmapped" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_unaligned + ] + ] + } + } +} + +process { + withName: 'PICARD_MERGESAMFILES' { + ext.args = '--SORT_ORDER coordinate --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + ext.prefix = { "${meta.id}.mLb.sorted" } + publishDir = [ enabled: false ] + } + + withName: '.*:MARK_DUPLICATES_PICARD:PICARD_MARKDUPLICATES' { + ext.args = '--ASSUME_SORTED true --REMOVE_DUPLICATES false --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + ext.prefix = { "${meta.id}.mLb.mkD.sorted" } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/picard_metrics" }, + mode: params.publish_dir_mode, + pattern: '*.metrics.txt' + ], + [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + mode: params.publish_dir_mode, + pattern: '*.bam', + enabled: params.save_align_intermeds + ] + ] + } + + withName: '.*:MARK_DUPLICATES_PICARD:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}', + enabled: params.save_align_intermeds + ] + } + + withName: '.*:MARK_DUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.mLb.mkD.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] + } + + // Should only be published when paired end data is used and save_align_intermeds is true + withName: 'BAM_FILTER' { + ext.prefix = { meta.single_end ? "${meta.id}.mLb.noPublish" : "${meta.id}.mLb.flT.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + mode: params.publish_dir_mode, + pattern: '*.mLb.flT.sorted.bam', + enabled: params.save_align_intermeds + ] + } + + withName: 'BAM_REMOVE_ORPHANS' { + ext.args = '--only_fr_pairs' + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ enabled: false ] + } + + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:FILTER_BAM_BAMTOOLS:BAM_SORT_SAMTOOLS:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.mLb.clN.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + mode: params.publish_dir_mode, + pattern: '*.bam' + ] + } + + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:FILTER_BAM_BAMTOOLS:BAM_SORT_SAMTOOLS:SAMTOOLS_INDEX' { + ext.prefix = { "${meta.id}.mLb.clN.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary" }, + mode: params.publish_dir_mode, + pattern: '*.{bai,csi}' + ] + } + + withName: 'NFCORE_CHIPSEQ:CHIPSEQ:FILTER_BAM_BAMTOOLS:BAM_SORT_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.mLb.clN.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + } + + withName: 'PHANTOMPEAKQUALTOOLS' { + ext.args2 = { "-p=$task.cpus" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/phantompeakqualtools" }, + mode: params.publish_dir_mode, + pattern: "*.{out,pdf}" + ] + } + + withName: 'MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS' { + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/phantompeakqualtools" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'BEDTOOLS_GENOMECOV' { + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/bigwig/scale" }, + mode: params.publish_dir_mode, + pattern: "*.txt" + ] + } + + withName: 'UCSC_BEDGRAPHTOBIGWIG' { + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/bigwig" }, + mode: params.publish_dir_mode, + pattern: "*.bigWig" + ] + } +} + +if (!params.skip_picard_metrics) { + process { + withName: 'PICARD_COLLECTMULTIPLEMETRICS' { + ext.args = '--VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + ext.prefix = { "${meta.id}.mLb.clN.sorted" } + publishDir = [ + [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/picard_metrics" }, + mode: params.publish_dir_mode, + pattern: "*_metrics" + ], + [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/picard_metrics/pdf" }, + mode: params.publish_dir_mode, + pattern: "*.pdf" + ] + ] + } + } +} + +if (!params.skip_preseq) { + process { + withName: 'PRESEQ_LCEXTRAP' { + ext.args = '-verbose -bam -seed 1' + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/preseq" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +if (!params.skip_plot_profile) { + process { + withName: 'DEEPTOOLS_COMPUTEMATRIX' { + ext.args = 'scale-regions --regionBodyLength 1000 --beforeRegionStartLength 3000 --afterRegionStartLength 3000 --skipZeros --smartLabels' + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/deepTools/plotProfile" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'DEEPTOOLS_PLOTPROFILE' { + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/deepTools/plotProfile" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'DEEPTOOLS_PLOTHEATMAP' { + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/deepTools/plotProfile" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +process { + withName: 'KHMER_UNIQUEKMERS' { + publishDir = [ enabled: false ] + } +} + +if (!params.skip_plot_fingerprint) { + process { + withName: 'DEEPTOOLS_PLOTFINGERPRINT' { + ext.args = { [ + '--skipZeros', + "--numberOfSamples $params.fingerprint_bins", + "--labels $meta.id $meta.control" + ].join(' ').trim() } + ext.prefix = { "${meta.id}.mLb.clN" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mergedLibrary/deepTools/plotFingerprint" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +process { + withName: 'MACS2_CALLPEAK' { + ext.args = [ + '--keep-dup all', + params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}", + params.save_macs_pileup ? '--bdg --SPMR' : '', + params.macs_fdr ? "--qvalue ${params.macs_fdr}" : '', + params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : '', + params.aligner == "chromap" ? "--format BAM" : '' + ].join(' ').trim() + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'FRIP_SCORE' { + ext.args = '-bed -c -f 0.20' + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/qc' + ].join('') }, + enabled: false + ] + } + + withName: 'MULTIQC_CUSTOM_PEAKS' { + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/qc' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} + +if (!params.skip_peak_annotation) { + process { + withName: 'HOMER_ANNOTATEPEAKS_MACS2' { + ext.args = '-gid' + ext.prefix = { "${meta.id}_peaks" } + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_peak_qc) { + process { + withName: 'PLOT_MACS2_QC' { + ext.args = '-o ./ -p macs2_peak' + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/qc' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'PLOT_HOMER_ANNOTATEPEAKS' { + ext.args = '-o ./' + ext.prefix = 'macs2_annotatePeaks' + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/qc' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} + +if (!params.skip_consensus_peaks) { + process { + withName: 'MACS2_CONSENSUS' { + ext.when = { meta.multiple_groups || meta.replicates_exist } + ext.prefix = { "${meta.id}.consensus_peaks" } + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/consensus', + "/${meta.id}" + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SUBREAD_FEATURECOUNTS' { + ext.args = '-F SAF -O --fracOverlap 0.2' + ext.prefix = { "${meta.id}.consensus_peaks" } + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/consensus', + "/${meta.id}" + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_peak_annotation) { + process { + withName: 'HOMER_ANNOTATEPEAKS_CONSENSUS' { + ext.args = '-gid' + ext.prefix = { "${meta.id}.consensus_peaks" } + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/consensus', + "/${meta.id}" + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'ANNOTATE_BOOLEAN_PEAKS' { + ext.prefix = { "${meta.id}.consensus_peaks" } + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/consensus', + "/${meta.id}" + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_deseq2_qc) { + process { + withName: DESEQ2_QC { + ext.when = { meta.multiple_groups && meta.replicates_exist } + ext.args = [ + '--id_col 1', + '--sample_suffix \'.mLb.clN.sorted.bam\'', + '--count_col 7', + params.deseq2_vst ? '--vst TRUE' : '' + ].join(' ').trim() + ext.prefix = { "${meta.id}.consensus_peaks" } + publishDir = [ + path: { [ + "${params.outdir}/${params.aligner}/mergedLibrary/macs2", + params.narrow_peak? '/narrowPeak' : '/broadPeak', + '/consensus', + "/${meta.id}", + '/deseq2' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} + +if (!params.skip_igv) { + process { + withName: 'IGV' { + publishDir = [ + path: { [ + "${params.outdir}/igv", + params.narrow_peak? '/narrowPeak' : '/broadPeak' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +if (!params.skip_multiqc) { + process { + withName: 'MULTIQC' { + ext.args = params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + publishDir = [ + path: { [ + "${params.outdir}/multiqc", + params.narrow_peak? '/narrowPeak' : '/broadPeak' + ].join('') }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} diff --git a/conf/test.config b/conf/test.config index e3b4016c2..9b24bc9af 100644 --- a/conf/test.config +++ b/conf/test.config @@ -1,31 +1,35 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a fast and simple test. Use as follows: - * nextflow run nf-core/chipseq -profile test, - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/chipseq -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Test profile' - config_profile_description = 'Minimal test dataset to check pipeline function' + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' - // Limit resources so that this can run on GitHub Actions - max_cpus = 2 - max_memory = 6.GB - max_time = 12.h + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' - // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/design.csv' + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_test.csv' + read_length = 50 - // Genome references - fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genome.fa' - gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genes.gtf' + // Genome references + fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genome.fa' + gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/atacseq/reference/genes.gtf' - // Not mandatory but permits the pipeline to run through peak-calling steps - macs_gsize = 1.2e7 + // For speed to avoid CI time-out + fingerprint_bins = 100 - // For speed to avoid CI time-out - fingerprint_bins = 100 + // Avoid preseq errors with test data + skip_preseq = true } diff --git a/conf/test_full.config b/conf/test_full.config old mode 100755 new mode 100644 index 3875f0297..d25c37d79 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -1,20 +1,25 @@ /* - * ------------------------------------------------- - * Nextflow config file for running tests - * ------------------------------------------------- - * Defines bundled input files and everything required - * to run a full pipeline test. Use as follows: - * nextflow run nf-core/chipseq -profile test_full, - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/chipseq -profile test_full, --outdir + +---------------------------------------------------------------------------------------- +*/ params { - config_profile_name = 'Full test profile' - config_profile_description = 'Full test dataset to check pipeline function' + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full size test + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/samplesheet/v2.0/samplesheet_full.csv' - // Input data - input = 'https://raw.githubusercontent.com/nf-core/test-datasets/chipseq/design_full.csv' - single_end = true + // Used to calculate --macs_gsize + read_length = 50 - // Genome references - genome = 'hg19' + // Genome references + genome = 'hg19' } diff --git a/docs/README.md b/docs/README.md index 80d36de34..d3849f4fe 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,12 +1,10 @@ # nf-core/chipseq: Documentation -The nf-core/chipseq documentation is split into the following files: +The nf-core/chipseq documentation is split into the following pages: -1. [Installation](https://nf-co.re/usage/installation) -2. Pipeline configuration - * [Local installation](https://nf-co.re/usage/local_installation) - * [Adding your own system config](https://nf-co.re/usage/adding_own_config) - * [Reference genomes](https://nf-co.re/usage/reference_genomes) -3. [Running the pipeline](usage.md) -4. [Output and how to interpret the results](output.md) -5. [Troubleshooting](https://nf-co.re/usage/troubleshooting) +- [Usage](usage.md) + - An overview of how the pipeline works, how to run it and a description of all of the different command-line flags. +- [Output](output.md) + - An overview of the different results produced by the pipeline and how to interpret them. + +You can find a lot more documentation about installing, configuring and running nf-core pipelines on the website: [https://nf-co.re](https://nf-co.re) diff --git a/docs/images/mqc_fastqc_adapter.png b/docs/images/mqc_fastqc_adapter.png new file mode 100755 index 000000000..361d0e47a Binary files /dev/null and b/docs/images/mqc_fastqc_adapter.png differ diff --git a/docs/images/mqc_fastqc_counts.png b/docs/images/mqc_fastqc_counts.png new file mode 100755 index 000000000..cb39ebb80 Binary files /dev/null and b/docs/images/mqc_fastqc_counts.png differ diff --git a/docs/images/mqc_fastqc_quality.png b/docs/images/mqc_fastqc_quality.png new file mode 100755 index 000000000..a4b89bf56 Binary files /dev/null and b/docs/images/mqc_fastqc_quality.png differ diff --git a/docs/images/nf-core-chipseq_logo.png b/docs/images/nf-core-chipseq_logo.png deleted file mode 100644 index e7fefa503..000000000 Binary files a/docs/images/nf-core-chipseq_logo.png and /dev/null differ diff --git a/docs/images/nf-core-chipseq_logo_dark.png b/docs/images/nf-core-chipseq_logo_dark.png new file mode 100644 index 000000000..9f2b30100 Binary files /dev/null and b/docs/images/nf-core-chipseq_logo_dark.png differ diff --git a/docs/images/nf-core-chipseq_logo_light.png b/docs/images/nf-core-chipseq_logo_light.png new file mode 100644 index 000000000..51a83b3d7 Binary files /dev/null and b/docs/images/nf-core-chipseq_logo_light.png differ diff --git a/docs/images/r_deseq2_ma_plot.png b/docs/images/r_deseq2_ma_plot.png deleted file mode 100755 index 67c598fc7..000000000 Binary files a/docs/images/r_deseq2_ma_plot.png and /dev/null differ diff --git a/docs/images/r_deseq2_volcano_plot.png b/docs/images/r_deseq2_volcano_plot.png deleted file mode 100755 index 679746b47..000000000 Binary files a/docs/images/r_deseq2_volcano_plot.png and /dev/null differ diff --git a/docs/output.md b/docs/output.md index 7a2c654ce..b31985543 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,6 +1,14 @@ -# ![nf-core/chipseq](images/nf-core-chipseq_logo.png) +# nf-core/chipseq: Output -This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. +## Introduction + +This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report generated from the [full-sized test dataset](https://github.com/nf-core/test-datasets/tree/chipseq#full-test-dataset-origin) for the pipeline using a command similar to the one below: + +```console +nextflow run nf-core/chipseq -profile test_full, +``` + +The directories listed below will be created in the output directory after the pipeline has finished. All paths are relative to the top-level results directory. ## Pipeline overview @@ -8,8 +16,6 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/). See [`main REA See [Illumina website](https://emea.illumina.com/techniques/sequencing/dna-sequencing/chip-seq.html) for more information regarding the ChIP-seq protocol, and for an extensive list of publications. -The directories listed below will be created in the output directory after the pipeline has finished. All paths are relative to the top-level results directory. - ## Library-level analysis The initial QC and alignments are performed at the library-level e.g. if the same library has been sequenced more than once to increase sequencing depth. This has the advantage of being able to assess each library individually, and the ability to process multiple libraries from the same sample in parallel. @@ -19,28 +25,28 @@ The initial QC and alignments are performed at the library-level e.g. if the sam
Output files -* `fastqc/` - * `*_fastqc.html`: FastQC report containing quality metrics for read 1 (*and read2 if paired-end*) **before** adapter trimming. -* `fastqc/zips/` - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `fastqc/` + - `*_fastqc.html`: FastQC report containing quality metrics for read 1 (_and read2 if paired-end_) **before** adapter trimming. +- `fastqc/zips/` + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images.
-[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/) gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%A/C/G/T). You get information about adapter contamination and other overrepresented sequences. +[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). ### Adapter trimming
Output files -* `trim_galore/` - * `*fastq.gz`: If `--save_trimmed` is specified, FastQ files **after** adapter trimming will be placed in this directory. -* `trim_galore/logs/` - * `*.log`: Log file generated by Trim Galore!. -* `trim_galore/fastqc/` - * `*_fastqc.html`: FastQC report containing quality metrics for read 1 (*and read2 if paired-end*) **after** adapter trimming. -* `trim_galore/fastqc/zips/` - * `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `trimgalore/` + - `*fastq.gz`: If `--save_trimmed` is specified, FastQ files **after** adapter trimming will be placed in this directory. +- `trimgalore/logs/` + - `*.log`: Log file generated by Trim Galore!. +- `trimgalore/fastqc/` + - `*_fastqc.html`: FastQC report containing quality metrics for read 1 (_and read2 if paired-end_) **after** adapter trimming. +- `trimgalore/fastqc/zips/` + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images.
@@ -50,22 +56,49 @@ The initial QC and alignments are performed at the library-level e.g. if the sam ### Alignment +The pipeline has been written in a way where all the files generated downstream of the alignment are placed in the same directory as specified by `--aligner` e.g. if `--aligner bwa` is specified then all the downstream results will be placed in the `bwa/` directory. This helps with organising the directory structure and more importantly, allows the end-user to get the results from multiple aligners by simply re-running the pipeline with a different `--aligner` option along the `-resume` parameter. It also means that results won't be overwritten when resuming the pipeline and can be used for benchmarking between alignment algorithms if required. Thus, `` in the directory structure below corresponds to the aligner set when running the pipeline. +
Output files -* `bwa/library/` - * `*.bam`: The files resulting from the alignment of individual libraries are not saved by default so this directory will not be present in your results. You can override this behaviour with the use of the `--save_align_intermeds` flag in which case it will contain the coordinate sorted alignment files in [`*.bam`](https://samtools.github.io/hts-specs/SAMv1.pdf) format. -* `bwa/library/samtools_stats/` - * SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. +- `/library/` + - `*.bam`: The files resulting from the alignment of individual libraries are not saved by default so this directory will not be present in your results. You can override this behaviour with the use of the `--save_align_intermeds` flag in which case it will contain the coordinate sorted alignment files in [`*.bam`](https://samtools.github.io/hts-specs/SAMv1.pdf) format. +- `/library/samtools_stats/` + - SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. -> **NB:** File names in the resulting directory (i.e. `bwa/library/`) will have the '`.Lb.`' suffix. +> **NB:** File names in the resulting directory (i.e. `/library/`) will have the '`.Lb.`' suffix.
-Adapter-trimmed reads are mapped to the reference assembly using [BWA](http://bio-bwa.sourceforge.net/bwa.shtml). A genome index is required to run BWA so if this is not provided explicitly using the `--bwa_index` parameter then it will be created automatically from the genome fasta input. The index creation process can take a while for larger genomes so it is possible to use the `--save_reference` parameter to save the indices for future pipeline runs, reducing processing times. +Adapter-trimmed reads are mapped to the reference assembly using the aligner set by the `--aligner` parameter. Available aligners are [BWA](http://bio-bwa.sourceforge.net/bwa.shtml) (default), [Bowtie 2](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml), [Chromap](https://github.com/haowenz/chromap) and [STAR](https://github.com/alexdobin/STAR). A genome index is required to run any of this aligners so if this is not provided explicitly using the corresponding parameter (e.g. `--bwa_index`), then it will be created automatically from the genome fasta input. The index creation process can take a while for larger genomes so it is possible to use the `--save_reference` parameter to save the indices for future pipeline runs, reducing processing times. ![MultiQC - SAMtools stats plot](images/mqc_samtools_stats_plot.png) +> **NB:** Currently, paired-end files produced by `Chromap` are excluded from downstream analysis due to [this](https://github.com/nf-core/chipseq/issues/291) issue. Single-end files are processed normally. + +#### Unmapped reads + +The `--save_unaligned` parameter enables to obtain FastQ files containing unmapped reads (only available for STAR and Bowtie2). + +
+ Output files +- `/library/unmapped/` + - `*.fastq.gz`: If `--save_unaligned` is specified, FastQ files containing unmapped reads will be placed in this directory. + +
+ +#### STAR logs + +
+ Output files + +- `star/library/log/` + - `*.SJ.out.tab`: File containing filtered splice junctions detected after mapping the reads. + - `*.Log.final.out`: STAR alignment report containing the mapping results summary. + - `*.Log.out` and `*.Log.progress.out`: STAR log files containing detailed information about the run. Typically only useful for debugging purposes. + +
+ ## Merged library-level analysis The library-level alignments associated with the same sample are merged and subsequently used for the downstream analyses. @@ -75,23 +108,23 @@ The library-level alignments associated with the same sample are merged and subs
Output files -* `bwa/mergedLibrary/` - * `*.bam`: Merged library-level, coordinate sorted `*.bam` files after the marking of duplicates, and filtering based on various criteria. The file suffix for the final filtered files will be `*.mLb.clN.*`. If you specify the `--save_align_intermeds` parameter then two additional sets of files will be present. These represent the unfiltered alignments with duplicates marked (`*.mLb.mkD.*`), and in the case of paired-end datasets the filtered alignments before the removal of orphan read pairs (`*.mLb.flT.*`). -* `bwa/mergedLibrary/samtools_stats/` - * SAMtools `*.flagstat`, `*.idxstats` and `*.stats` files generated from the alignment files. -* `bwa/mergedLibrary/picard_metrics/` - * `*_metrics`: Alignment QC files from picard CollectMultipleMetrics. - * `*.metrics.txt`: Metrics file from MarkDuplicates. -* `bwa/mergedLibrary/picard_metrics/pdf/` - * `*.pdf`: Alignment QC plot files from picard CollectMultipleMetrics. -* `bwa/mergedLibrary/preseq/` - * `*.ccurve.txt`: Preseq expected future yield file. +- `/mergedLibrary/` + - `*.bam`: Merged library-level, coordinate sorted `*.bam` files after the marking of duplicates, and filtering based on various criteria. The file suffix for the final filtered files will be `*.mLb.clN.*`. If you specify the `--save_align_intermeds` parameter then two additional sets of files will be present. These represent the unfiltered alignments with duplicates marked (`*.mLb.mkD.*`), and in the case of paired-end datasets the filtered alignments before the removal of orphan read pairs (`*.mLb.flT.*`). +- `/mergedLibrary/samtools_stats/` + - SAMtools `*.flagstat`, `*.idxstats` and `*.stats` files generated from the alignment files. +- `/mergedLibrary/picard_metrics/` + - `*_metrics`: Alignment QC files from picard CollectMultipleMetrics. + - `*.metrics.txt`: Metrics file from MarkDuplicates. +- `/mergedLibrary/picard_metrics/pdf/` + - `*.pdf`: Alignment QC plot files from picard CollectMultipleMetrics. +- `/mergedLibrary/preseq/` + - `*.lc_extrap.txt`: Preseq expected future yield file. -> **NB:** File names in the resulting directory (i.e. `bwa/mergedLibrary/`) will have the '`.mLb.`' suffix. +> **NB:** File names in the resulting directory (i.e. `/mergedLibrary/`) will have the '`.mLb.`' suffix.
-[Picard MergeSamFiles and MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html) are used in combination to merge the alignments, and for the marking of duplicates, respectively. If you only have one library for any given replicate then the merging step isnt carried out because the library-level and merged library-level BAM files will be exactly the same. +[Picard MergeSamFiles and MarkDuplicates](https://broadinstitute.github.io/picard/command-line-overview.html) are used in combination to merge the alignments, and for the marking of duplicates, respectively. If you only have one library for any given replicate then the merging step is not carried out because the library-level and merged library-level BAM files will be exactly the same. ![MultiQC - Picard deduplication stats plot](images/mqc_picard_deduplication_plot.png) @@ -108,8 +141,8 @@ The [Preseq](http://smithlabresearch.org/software/preseq/) package is aimed at p
Output files -* `bwa/mergedLibrary/bigwig/` - * `*.bigWig`: Normalised bigWig files scaled to 1 million mapped reads. +- `/mergedLibrary/bigwig/` + - `*.bigWig`: Normalised bigWig files scaled to 1 million mapped reads.
@@ -120,13 +153,13 @@ The [bigWig](https://genome.ucsc.edu/goldenpath/help/bigWig.html) format is in a
Output files -* `bwa/mergedLibrary/phantompeakqualtools/` - * `*.spp.out`, `*.spp.pdf`: phantompeakqualtools output files. - * `*_mqc.tsv`: MultiQC custom content files. -* `bwa/mergedLibrary/deepTools/plotFingerprint/` - * `*.plotFingerprint.pdf`, `*.plotFingerprint.qcmetrics.txt`, `*.plotFingerprint.raw.txt`: plotFingerprint output files. -* `bwa/mergedLibrary/deepTools/plotProfile/` - * `*.computeMatrix.mat.gz`, `*.computeMatrix.vals.mat.tab`, `*.plotProfile.pdf`, `*.plotProfile.tab`, `*.plotHeatmap.pdf`, `*.plotHeatmap.mat.tab`: plotProfile output files. +- `/mergedLibrary/phantompeakqualtools/` + - `*.spp.out`, `*.spp.pdf`: phantompeakqualtools output files. + - `*_mqc.tsv`: MultiQC custom content files. +- `/mergedLibrary/deepTools/plotFingerprint/` + - `*.plotFingerprint.pdf`, `*.plotFingerprint.qcmetrics.txt`, `*.plotFingerprint.raw.txt`: plotFingerprint output files. +- `/mergedLibrary/deepTools/plotProfile/` + - `*.computeMatrix.mat.gz`, `*.computeMatrix.vals.mat.tab`, `*.plotProfile.pdf`, `*.plotProfile.tab`, `*.plotHeatmap.pdf`, `*.plotHeatmap.mat.tab`: plotProfile output files.
@@ -155,23 +188,23 @@ The results from deepTools plotProfile gives you a quick visualisation for the g
Output files -* `bwa/mergedLibrary/macs//` - * `*.xls`, `*.broadPeak` or `*.narrowPeak`, `*.gappedPeak`, `*summits.bed`: MACS2 output files - the files generated will depend on whether MACS2 has been run in *narrowPeak* or *broadPeak* mode. - * `*.annotatePeaks.txt`: HOMER peak-to-gene annotation file. -* `bwa/mergedLibrary/macs//qc/` - * `macs_peak.plots.pdf`: QC plots for MACS2 peaks. - * `macs_annotatePeaks.plots.pdf`: QC plots for peak-to-gene feature annotation. - * `*.FRiP_mqc.tsv`, `*.count_mqc.tsv`, `macs_annotatePeaks.summary_mqc.tsv`: MultiQC custom-content files for FRiP score, peak count and peak-to-gene ratios. +- `/mergedLibrary/macs2//` + - `*.xls`, `*.broadPeak` or `*.narrowPeak`, `*.gappedPeak`, `*summits.bed`: MACS2 output files - the files generated will depend on whether MACS2 has been run in _narrowPeak_ or _broadPeak_ mode. + - `*.annotatePeaks.txt`: HOMER peak-to-gene annotation file. +- `/mergedLibrary/macs2//qc/` + - `macs2_peak.plots.pdf`: QC plots for MACS2 peaks. + - `macs2_annotatePeaks.plots.pdf`: QC plots for peak-to-gene feature annotation. + - `*.FRiP_mqc.tsv`, `*.peak_count_mqc.tsv`, `annotatepeaks.summary_mqc.tsv`: MultiQC custom-content files for FRiP score, peak count and peak-to-gene ratios. > **NB:** `` in the directory structure above corresponds to the type of peak that you have specified to call with MACS2 i.e. `broadPeak` or `narrowPeak`. If you so wish, you can call both narrow and broad peaks without redoing the preceding steps in the pipeline such as the alignment and filtering. For example, if you already have broad peaks then just add `--narrow_peak -resume` to the command you used to run the pipeline, and these will be called too! However, resuming the pipeline will only be possible if you have not deleted the `work/` directory generated by the pipeline.
-[MACS2](https://github.com/taoliu/MACS) is one of the most popular peak-calling algorithms for ChIP-seq data. By default, the peaks are called with the MACS2 `--broad` parameter. If, however, you would like to call narrow peaks then please provide the `--narrow_peak` parameter when running the pipeline. See [MACS2 outputs](https://github.com/taoliu/MACS#output-files) for a description of the output files generated by MACS2. +[MACS2](https://github.com/macs3-project/MACS) is one of the most popular peak-calling algorithms for ChIP-seq data. By default, the peaks are called with the MACS2 `--broad` parameter. If, however, you would like to call narrow peaks then please provide the `--narrow_peak` parameter when running the pipeline. See [MACS2 outputs](https://github.com/macs3-project/MACS/blob/master/docs/callpeak.md#output-files) for a description of the output files generated by MACS2. ![MultiQC - MACS2 total peak count plot](images/mqc_macs2_peak_count_plot.png) -[HOMER annotatePeaks.pl](http://homer.ucsd.edu/homer/ngs/annotation.html) is used to annotate the peaks relative to known genomic features. HOMER is able to use the `--gtf` annotation file which is provided to the pipeline. Please note that some of the output columns will be blank because the annotation is not provided using HOMER's in-built database format. However, the more important fields required for downstream analysis will be populated i.e. *Annotation*, *Distance to TSS* and *Nearest Promoter ID*. +[HOMER annotatePeaks.pl](http://homer.ucsd.edu/homer/ngs/annotation.html) is used to annotate the peaks relative to known genomic features. HOMER is able to use the `--gtf` annotation file which is provided to the pipeline. Please note that some of the output columns will be blank because the annotation is not provided using HOMER's in-built database format. However, the more important fields required for downstream analysis will be populated i.e. _Annotation_, _Distance to TSS_ and _Nearest Promoter ID_. ![MultiQC - HOMER annotatePeaks peak-to-gene feature ratio plot](images/mqc_annotatePeaks_feature_percentage_plot.png) @@ -184,20 +217,20 @@ Various QC plots per sample including number of peaks, fold-change distribution,
Output files -* `bwa/mergedLibrary/macs//consensus/` - * `*.bed`: Consensus peak-set across all samples in BED format. - * `*.saf`: Consensus peak-set across all samples in SAF format. Required by featureCounts for read quantification. - * `*.featureCounts.txt`: Read counts across all samples relative to consensus peak-set. - * `*.annotatePeaks.txt`: HOMER peak-to-gene annotation file for consensus peaks. - * `*.boolean.annotatePeaks.txt`: Spreadsheet representation of consensus peak-set across samples **with** gene annotation columns. The columns from individual peak files are included in this file along with the ability to filter peaks based on their presence or absence in multiple replicates/conditions. - * `*.boolean.txt`: Spreadsheet representation of consensus peak-set across samples **without** gene annotation columns. Same as file above but without annotation columns. - * `*.boolean.intersect.plot.pdf`, `*.boolean.intersect.txt`: [UpSetR](https://cran.r-project.org/web/packages/UpSetR/README.html) files to illustrate peak intersection. +- `/mergedLibrary/macs2//consensus//` + - `*.bed`: Consensus peak-set across all samples in BED format. + - `*.saf`: Consensus peak-set across all samples in SAF format. Required by featureCounts for read quantification. + - `*.featureCounts.txt`: Read counts across all samples relative to consensus peak-set. + - `*.annotatePeaks.txt`: HOMER peak-to-gene annotation file for consensus peaks. + - `*.boolean.annotatePeaks.txt`: Spreadsheet representation of consensus peak-set across samples **with** gene annotation columns. The columns from individual peak files are included in this file along with the ability to filter peaks based on their presence or absence in multiple replicates/conditions. + - `*.boolean.txt`: Spreadsheet representation of consensus peak-set across samples **without** gene annotation columns. Same as file above but without annotation columns. + - `*.boolean.intersect.plot.pdf`, `*.boolean.intersect.txt`: [UpSetR](https://cran.r-project.org/web/packages/UpSetR/README.html) files to illustrate peak intersection.
In order to perform the differential binding analysis we need to be able to carry out the read quantification for the same intervals across **all** of the samples in the experiment. To this end, the individual peak-sets called per sample have to be merged together in order to create a consensus set of peaks. -Using the consensus peaks it is possible to assess the degree of overlap between the peaks from a set of samples e.g. *Which consensus peaks contain peaks that are common/unique to a given set of samples?*. This may be useful for downstream filtering of peaks based on whether they are called in multiple replicates/conditions. Please note that it is possible for a consensus peak to contain multiple peaks from the same sample. Unfortunately, this is sample-dependent but the files generated by the pipeline do have columns that report such instances and allow you to factor them into any further analysis. +Using the consensus peaks it is possible to assess the degree of overlap between the peaks from a set of samples e.g. _Which consensus peaks contain peaks that are common/unique to a given set of samples?_. This may be useful for downstream filtering of peaks based on whether they are called in multiple replicates/conditions. Please note that it is possible for a consensus peak to contain multiple peaks from the same sample. Unfortunately, this is sample-dependent but the files generated by the pipeline do have columns that report such instances and allow you to factor them into any further analysis. ![R - UpSetR peak intersection plot](images/r_upsetr_intersect_plot.png) @@ -212,40 +245,30 @@ The [featureCounts](http://bioinf.wehi.edu.au/featureCounts/) tool is used to co
Output files -* `bwa/mergedLibrary/macs//consensus//deseq2/` - * `*.results.txt`: Spreadsheet containing differential binding results across all consensus peaks and all comparisons. - * `*.plots.pdf`: File containing PCA and hierarchical clustering plots. - * `*.log`: Log file with information for number of differentially bound intervals at different FDR and fold-change thresholds for each comparison. - * `*.dds.rld.RData`: File containing R `dds` and `rld` objects generated by DESeq2. - * `R_sessionInfo.log`: File containing information about R, the OS and attached or loaded packages. -* `bwa/mergedLibrary/macs//consensus///` - * `*.results.txt`: Spreadsheet containing comparison-specific DESeq2 output for differential binding results across all peaks. - * `*FDR0.01.results.txt`, `*FDR0.05.results.txt`: Subset of above file for peaks that pass FDR <= 0.01 and FDR <= 0.05. - * `*FDR0.01.results.bed`, `*FDR0.05.results.bed`: BED files for peaks that pass FDR <= 0.01 and FDR <= 0.05. - * `*deseq2.plots.pdf`: MA, Volcano, clustering and scatterplots at FDR <= 0.01 and FDR <= 0.05. -* `bwa/mergedLibrary/macs//consensus//sizeFactors/` - * `*.txt`, `*.RData`: Files containing DESeq2 sizeFactors per sample. +- `/mergedLibrary/macs2//consensus//deseq2/` + - `*.sample.dists.txt`: Spreadsheet containing sample-to-sample distance across each consensus peak. + - `*.plots.pdf`: File containing PCA and hierarchical clustering plots. + - `*.dds.RData`: File containing R `DESeqDataSet` object generated by DESeq2, with either + an rlog or vst `assay` storing the variance-stabilised data. + - `*.rds`: Alternative version of the RData file suitable for + `readRDS` to give user control of the eventual object name. + - `*pca.vals.txt`: Matrix of values for the first 2 principal components. + - `R_sessionInfo.log`: File containing information about R, the OS and attached or loaded packages. + - `/mergedLibrary/macs2//consensus//sizeFactors/` + - `*.txt`, `*.RData`: Files containing DESeq2 sizeFactors per sample.
[DESeq2](https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html) is more commonly used to perform differential expression analysis for RNA-seq datasets. However, it can also be used for ChIP-seq differential binding analysis, in which case you can imagine that instead of counts per gene for RNA-seq data we now have counts per bound region. -This pipeline uses a standardised DESeq2 analysis script to get an idea of the reproducibility within the experiment, and to assess the overall differential binding. Please note that this will not suit every experimental design, and if there are other problems with the experiment then it may not work as well as expected. For larger experiments, it may be recommended to use the `vst` transformation instead of the default `rlog` option. You can do this by providing the `--deseq2_vst` parameter to the pipeline. See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization) for a more detailed explanation. +**This pipeline uses a standardised DESeq2 analysis script to get an idea of the reproducibility within the experiment, and to assess the overall differential binding. Please note that this will not suit every experimental design, and if there are other problems with the experiment then it may not work as well as expected.** + +For larger experiments, it is recommended to use the `vst` transformation instead of the `rlog` option. This is the default behaviour and can be controlled with the `--deseq2_vst` parameter. See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization) for a more detailed explanation. ![MultiQC - DESeq2 PCA plot](images/mqc_deseq2_pca_plot.png) ![MultiQC - DESeq2 sample similarity plot](images/mqc_deseq2_sample_similarity_plot.png) -By default, all possible pairwise comparisons across the groups from a particular antibody (as defined in [`design.csv`](usage.md#--design)) are performed. The DESeq2 results are generated by the pipeline in various ways. You can load up the results across all of the comparisons in a single spreadsheet, or individual folders will also be created that contain the results specific to a particular comparison. For the latter, additional files will also be generated where the intervals have been pre-filtered based on a couple of standard FDR thresholds. Please see [DESeq2 output](http://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#differential-expression-analysis) for a description of the columns generated by DESeq2. - -

- R - DESeq2 MA plot -

- -

- R - DESeq2 Volcano plot -

- ## Aggregate analysis ### Present QC for the raw read, alignment, peak and differential binding results @@ -253,10 +276,10 @@ By default, all possible pairwise comparisons across the groups from a particula
Output files -* `multiqc//` - * `multiqc_report.html`: A standalone HTML file that can be viewed in your web browser. - * `multiqc_data/`: Directory containing parsed statistics from the different tools used in the pipeline. - * `multiqc_plots/`: Directory containing static images from the report in various formats. +- `multiqc//` + - `multiqc_report.html`: A standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: Directory containing parsed statistics from the different tools used in the pipeline. + - `multiqc_plots/`: Directory containing static images from the report in various formats.
@@ -271,9 +294,9 @@ The pipeline has special steps which also allow the software versions to be repo
Output files -* `igv//` - * `igv_session.xml`: Session file that can be directly loaded into IGV. - * `igv_files.txt`: File containing a listing of the files used to create the IGV session. +- `igv//` + - `igv_session.xml`: Session file that can be directly loaded into IGV. + - `igv_files.txt`: File containing a listing of the files used to create the IGV session.
@@ -296,10 +319,16 @@ Once installed, open IGV, go to `File > Open Session` and select the `igv_sessio
Output files -* `genome/` - * A number of genome-specific files are generated by the pipeline in order to aid in the filtering of the data, and because they are required by standard tools such as BEDTools. These can be found in this directory along with the genome fasta file which is required by IGV. If using a genome from AWS iGenomes and if it exists a `README.txt` file containing information about the annotation version will also be saved in this directory. -* `genome/BWAIndex/` - * If the `--save_reference` parameter is provided then the alignment indices generated by the pipeline will be saved in this directory. This can be quite a time-consuming process so it permits their reuse for future runs of the pipeline or for other purposes. +- `genome/` + - A number of genome-specific files are generated by the pipeline in order to aid in the filtering of the data, and because they are required by standard tools such as BEDTools. These can be found in this directory along with the genome fasta file which is required by IGV. If using a genome from AWS iGenomes and if it exists a `README.txt` file containing information about the annotation version will also be saved in this directory. +- `genome/index/` + + - `bwa/`: Directory containing BWA indices. + - `bowtie2/`: Directory containing BOWTIE2 indices. + - `chromap/`: Directory containing Chromap indices. + - `star/`: Directory containing STAR indices. + + - If the `--save_reference` parameter is provided then the alignment indices generated by the pipeline will be saved in this directory. This can be quite a time-consuming process so it permits their reuse for future runs of the pipeline or for other purposes.
@@ -310,11 +339,10 @@ Reference genome-specific files can be useful to keep for the downstream process
Output files -* `pipeline_info/` - * `pipeline_report.html`, `pipeline_report.txt`, `software_versions.csv`: Reports generated by the pipeline. - * `execution_report.html`, `execution_timeline.html`, `execution_trace.txt`, `pipeline_dag.svg`: Reports generated by Nextflow. - * `design_reads.csv`, `design_controls.csv`: Reformatted design files used as input to the pipeline. - * `results_description.html`: Documentation for interpretation of results in HTML format. +- `pipeline_info/` + - Reports generated by Nextflow: `execution_report.html`, `execution_timeline.html`, `execution_trace.txt` and `pipeline_dag.dot`/`pipeline_dag.svg`. + - Reports generated by the pipeline: `pipeline_report.html`, `pipeline_report.txt` and `software_versions.yml`. The `pipeline_report*` files will only be present if the `--email` / `--email_on_fail` parameter's are used when running the pipeline. + - Reformatted samplesheet files used as input to the pipeline: `samplesheet.valid.csv`.
diff --git a/docs/usage.md b/docs/usage.md index 44d0b95ba..4a6560322 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -1,590 +1,315 @@ # nf-core/chipseq: Usage -## Table of contents - -* [Table of contents](#table-of-contents) -* [Introduction](#introduction) -* [Running the pipeline](#running-the-pipeline) - * [Updating the pipeline](#updating-the-pipeline) - * [Reproducibility](#reproducibility) -* [Main arguments](#main-arguments) - * [`-profile`](#-profile) - * [`--input`](#--input) -* [Generic arguments](#generic-arguments) - * [`--single_end`](#--single_end) - * [`--seq_center`](#--seq_center) - * [`--fragment_size`](#--fragment_size) - * [`--fingerprint_bins`](#--fingerprint_bins) -* [Reference genomes](#reference-genomes) - * [`--genome` (using iGenomes)](#--genome-using-igenomes) - * [`--fasta`](#--fasta) - * [`--gtf`](#--gtf) - * [`--bwa_index`](#--bwa_index) - * [`--gene_bed`](#--gene_bed) - * [`--macs_gsize`](#--macs_gsize) - * [`--blacklist`](#--blacklist) - * [`--save_reference`](#--save_reference) - * [`--igenomes_ignore`](#--igenomes_ignore) -* [Adapter trimming](#adapter-trimming) - * [`--skip_trimming`](#--skip_trimming) - * [`--save_trimmed`](#--save_trimmed) -* [Alignments](#alignments) - * [`--bwa_min_score`](#--bwa_min_score) - * [`--keep_dups`](#--keep_dups) - * [`--keep_multi_map`](#--keep_multi_map) - * [`--save_align_intermeds`](#--save_align_intermeds) -* [Peaks](#peaks) - * [`--narrow_peak`](#--narrow_peak) - * [`--broad_cutoff`](#--broad_cutoff) - * [`--macs_fdr`](#--macs_fdr) - * [`--macs_pvalue`](#--macs_pvalue) - * [`--min_reps_consensus`](#--min_reps_consensus) - * [`--save_macs_pileup`](#--save_macs_pileup) - * [`--skip_peak_qc`](#--skip_peak_qc) - * [`--skip_peak_annotation`](#--skip_peak_annotation) - * [`--skip_consensus_peaks`](#--skip_consensus_peaks) -* [Differential analysis](#differential_analysis) - * [`--deseq2_vst`](#--deseq2_vst) - * [`--skip_diff_analysis`](#--skip_diff_analysis) -* [Skipping QC steps](#skipping-qc-steps) -* [Job resources](#job-resources) - * [Automatic resubmission](#automatic-resubmission) - * [Custom resource requests](#custom-resource-requests) -* [AWS Batch specific parameters](#aws-batch-specific-parameters) - * [`--awsqueue`](#--awsqueue) - * [`--awsregion`](#--awsregion) - * [`--awscli`](#--awscli) -* [Other command line parameters](#other-command-line-parameters) - * [`--outdir`](#--outdir) - * [`--publish_dir_mode`](#--publish_dir_mode) - * [`--email`](#--email) - * [`--email_on_fail`](#--email_on_fail) - * [`--max_multiqc_email_size`](#--max_multiqc_email_size) - * [`-name`](#-name) - * [`-resume`](#-resume) - * [`-c`](#-c) - * [`--custom_config_version`](#--custom_config_version) - * [`--custom_config_base`](#--custom_config_base) - * [`--max_memory`](#--max_memory) - * [`--max_time`](#--max_time) - * [`--max_cpus`](#--max_cpus) - * [`--plaintext_email`](#--plaintext_email) - * [`--monochrome_logs`](#--monochrome_logs) - * [`--multiqc_config`](#--multiqc_config) - -## Introduction - -Nextflow handles job submissions on SLURM or other environments, and supervises running the jobs. Thus the Nextflow process must run until the pipeline is finished. We recommend that you put the process running in the background through `screen` / `tmux` or similar tool. Alternatively you can run nextflow within a cluster job submitted your job scheduler. - -It is recommended to limit the Nextflow Java virtual machines memory. We recommend adding the following line to your environment (typically in `~/.bashrc` or `~./bash_profile`): +## :warning: Please read this documentation on the nf-core website: [https://nf-co.re/chipseq/usage](https://nf-co.re/chipseq/usage) -```bash -NXF_OPTS='-Xms1g -Xmx4g' -``` +> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ -## Running the pipeline +## Samplesheet input -The typical command for running the pipeline is as follows: +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 5 columns, and a header row as shown in the examples below. ```bash -nextflow run nf-core/chipseq --input design.csv --genome GRCh37 -profile docker +--input '[path to samplesheet file]' ``` -This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. - -Note that the pipeline will create the following files in your working directory: - -```bash -work # Directory containing the nextflow working files -results # Finished results (configurable, see below) -.nextflow_log # Log file from Nextflow -# Other nextflow hidden files, eg. history of pipeline runs and old logs. +### Multiple runs of the same library + +The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will perform the alignments in parallel, and subsequently merge them before further analysis. Below is an example where the samples called `WT_BCATENIN_IP_REP2` and `WT_INPUT_REP2` have been re-sequenced multiple times: + +```console +sample,fastq_1,fastq_2,antibody,control +WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L003_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,,, +WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,,, +WT_INPUT_REP2,BLA203A30_S21_L002_R1_001.fastq.gz,,, +WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,,, ``` -### Updating the pipeline - -When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: - -```bash -nextflow pull nf-core/chipseq +### Full design + +The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 5 columns to match those defined in the table below. + +The `antibody` column is required to separate the downstream consensus peak merging for different antibodies. Its not advisable to generate a consensus peak set across different antibodies especially if their binding patterns are inherently different e.g. narrow transcription factors and broad histone marks. + +The `control` column should be the `sample` identifier for the controls for any given IP. + +A final design file may look something like the one below. This is for two antibodies and associated controls, where the `WT_BCATENIN_IP_REP2` and `NAIVE_BCATENIN_IP_REP2` samples have been sequenced twice: + +```console +sample,fastq_1,fastq_2,antibody,control +WT_BCATENIN_IP_REP1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT +WT_BCATENIN_IP_REP3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT +NAIVE_BCATENIN_IP_REP1,BLA203A7_S60_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP2,BLA203A43_S34_L002_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +NAIVE_BCATENIN_IP_REP3,BLA203A64_S55_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT +WT_TCF4_IP_REP1,BLA203A3_S29_L006_R1_001.fastq.gz,,TCF4,WT_INPUT +WT_TCF4_IP_REP2,BLA203A27_S18_L001_R1_001.fastq.gz,,TCF4,WT_INPUT +WT_TCF4_IP_REP3,BLA203A51_S42_L001_R1_001.fastq.gz,,TCF4,WT_INPUT +NAIVE_TCF4_IP_REP1,BLA203A9_S62_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT +NAIVE_TCF4_IP_REP2,BLA203A45_S36_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT +NAIVE_TCF4_IP_REP3,BLA203A66_S57_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT +WT_INPUT_REP1,BLA203A6_S32_L006_R1_001.fastq.gz,,, +WT_INPUT_REP2,BLA203A30_S21_L001_R1_001.fastq.gz,,, +WT_INPUT_REP3,BLA203A31_S21_L003_R1_001.fastq.gz,,, +NAIVE_INPUT_REP1,BLA203A12_S3_L001_R1_001.fastq.gz,,, +NAIVE_INPUT_REP2,BLA203A48_S39_L001_R1_001.fastq.gz,,, +NAIVE_INPUT_REP3,BLA203A49_S1_L006_R1_001.fastq.gz,,, ``` -### Reproducibility +| Column | Description | +| ---------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `antibody` | Antibody name. This is required to segregate downstream analysis for different antibodies. Required when `control` is specified. | +| `control` | Sample name for control sample. | -It's a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. +Example design files have been provided with the pipeline for [paired-end](../assets/samplesheet_pe.csv) and [single-end](../assets/samplesheet_se.csv) data. -First, go to the [nf-core/chipseq releases page](https://github.com/nf-core/chipseq/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. +> **NB:** The `group` and `replicate` columns were replaced with a single `sample` column as of v2.0 of the pipeline. The `sample` column is essentially a concatenation of the `group` and `replicate` columns. If all values of `sample` have the same number of underscores, fields defined by these underscore-separated names may be used in the PCA plots produced by the pipeline, to regain the ability to represent different groupings. -This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. +## Reference genome files -## Main arguments +The minimum reference genome requirements are a FASTA and GTF file, all other files required to run the pipeline can be generated from these files. However, it is more storage and compute friendly if you are able to re-use reference genome files as efficiently as possible. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build new indices (e.g. those unavailable on [AWS iGenomes](https://nf-co.re/usage/reference_genomes)) so that you can save them somewhere locally. The index building step can be quite a time-consuming process and it permits their reuse for future runs of the pipeline to save disk space. You can then either provide the appropriate reference genome files on the command-line via the appropriate parameters (e.g. `--bwa_index '/path/to/bwa/index/'`) or via a custom config file. -### `-profile` +- If `--genome` is provided then the FASTA and GTF files (and existing indices) will be automatically obtained from AWS-iGenomes unless these have already been downloaded locally in the path specified by `--igenomes_base`. +- If `--gene_bed` is not provided then it will be generated from the GTF file. -Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. +> **NB:** Compressed reference files are also supported by the pipeline i.e. standard files with the `.gz` extension and indices folders with the `tar.gz` extension. -Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Conda) - see below. +## Blacklist bed files -> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. +The blacklist bed files where obtained using the commands below: -The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). - -Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! -They are loaded in sequence, so later profiles can overwrite earlier profiles. - -If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. +```console +cd .. +mkdir -p v1.0 +cd v1.0 +wget -L https://www.encodeproject.org/files/ENCFF001TDO/@@download/ENCFF001TDO.bed.gz && gunzip ENCFF001TDO.bed.gz && mv ENCFF001TDO.bed hg19-blacklist.v1.bed -* `docker` - * A generic configuration profile to be used with [Docker](http://docker.com/) - * Pulls software from dockerhub: [`nfcore/chipseq`](http://hub.docker.com/r/nfcore/chipseq/) -* `singularity` - * A generic configuration profile to be used with [Singularity](http://singularity.lbl.gov/) - * Pulls software from DockerHub: [`nfcore/chipseq`](http://hub.docker.com/r/nfcore/chipseq/) -* `conda` - * Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker or Singularity. - * A generic configuration profile to be used with [Conda](https://conda.io/docs/) - * Pulls most software from [Bioconda](https://bioconda.github.io/) -* `test` - * A profile with a complete configuration for automated testing - * Includes links to test data so needs no other parameters +mkdir -p assets/blacklists/v2.0/ +cd assets/blacklists/v2.0/ +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/ce10-blacklist.v2.bed.gz && gunzip ce10-blacklist.v2.bed.gz +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/ce11-blacklist.v2.bed.gz && gunzip ce11-blacklist.v2.bed.gz +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/dm3-blacklist.v2.bed.gz && gunzip dm3-blacklist.v2.bed.gz +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/dm6-blacklist.v2.bed.gz && gunzip dm6-blacklist.v2.bed.gz +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/hg19-blacklist.v2.bed.gz && gunzip hg19-blacklist.v2.bed.gz +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/hg38-blacklist.v2.bed.gz && gunzip hg38-blacklist.v2.bed.gz +wget -L https://raw.githubusercontent.com/Boyle-Lab/Blacklist/master/lists/mm10-blacklist.v2.bed.gz && gunzip mm10-blacklist.v2.bed.gz -### `--input` - -You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 6 columns, and a header row as shown in the examples below. - -```bash ---input '[path to design file]' +cd .. +mkdir -p v3.0 +cd v3.0 +wget -L https://www.encodeproject.org/files/ENCFF356LFX/@@download/ENCFF356LFX.bed.gz && gunzip ENCFF356LFX.bed.gz && mv ENCFF356LFX.bed hg38-blacklist.v3.bed ``` -#### Multiple replicates - -The `group` identifier should be identical when you have multiple replicates from the same experimental group, just increment the `replicate` identifier appropriately. The first replicate value for any given experimental group must be 1. - -The `antibody` column is required to separate the downstream consensus peak merging and differential analysis for different antibodies. Its not advisable to generate a consensus peak set across different antibodies especially if their binding patterns are inherently different e.g. narrow transcription factors and broad histone marks. +> **NB:** A detailed description of the different versions of the files can be found [here](https://sites.google.com/site/anshulkundaje/projects/blacklists). Also, to to see which blacklist bed files are assigned by default to the respective reference genome check the [igenomes.config](https://github.com/nf-core/chipseq/blob/master/conf/igenomes.config). -The `control` column should be the `group` identifier for the controls for any given IP. The pipeline will automatically pair the inputs based on replicate identifier (i.e. where you have an equal number of replicates for your IP's and controls), alternatively, the first control sample in that group will be selected. +## Running the pipeline -In the single-end design below there are triplicate samples for the `WT_BCATENIN_IP` group along with triplicate samples for their corresponding `WT_INPUT` samples. +The typical command for running the pipeline is as follows: ```bash -group,replicate,fastq_1,fastq_2,antibody,control -WT_BCATENIN_IP,1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP,2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP,3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_INPUT,1,BLA203A6_S32_L006_R1_001.fastq.gz,,, -WT_INPUT,2,BLA203A30_S21_L002_R1_001.fastq.gz,,, -WT_INPUT,3,BLA203A31_S21_L003_R1_001.fastq.gz,,, +nextflow run nf-core/chipseq --input samplesheet.csv --outdir --genome GRCh37 -profile docker ``` -#### Multiple runs of the same library +This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. -Both the `group` and `replicate` identifiers should be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will perform the alignments in parallel, and subsequently merge them before further analysis. Below is an example where the second replicate of the `WT_BCATENIN_IP` and `WT_INPUT` groups has been re-sequenced multiple times: +Note that the pipeline will create the following files in your working directory: ```bash -group,replicate,fastq_1,fastq_2,antibody,control -WT_BCATENIN_IP,1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP,2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP,2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP,2,BLA203A25_S16_L003_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP,3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_INPUT,1,BLA203A6_S32_L006_R1_001.fastq.gz,,, -WT_INPUT,2,BLA203A30_S21_L001_R1_001.fastq.gz,,, -WT_INPUT,2,BLA203A30_S21_L002_R1_001.fastq.gz,,, -WT_INPUT,3,BLA203A31_S21_L003_R1_001.fastq.gz,,, +work # Directory containing the nextflow working files + # Finished results in specified location (defined with --outdir) +.nextflow_log # Log file from Nextflow +# Other nextflow hidden files, eg. history of pipeline runs and old logs. ``` -#### Full design +### Updating the pipeline -A final design file may look something like the one below. This is for two antibodies and associated controls in triplicate, where the second replicate of the `WT_BCATENIN_IP` and `NAIVE_BCATENIN_IP` group has been sequenced twice: +When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: ```bash -group,replicate,fastq_1,fastq_2,antibody,control -WT_BCATENIN_IP,1,BLA203A1_S27_L006_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP,2,BLA203A25_S16_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP,2,BLA203A25_S16_L002_R1_001.fastq.gz,,BCATENIN,WT_INPUT -WT_BCATENIN_IP,3,BLA203A49_S40_L001_R1_001.fastq.gz,,BCATENIN,WT_INPUT -NAIVE_BCATENIN_IP,1,BLA203A7_S60_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP,2,BLA203A43_S34_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP,2,BLA203A43_S34_L002_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -NAIVE_BCATENIN_IP,3,BLA203A64_S55_L001_R1_001.fastq.gz,,BCATENIN,NAIVE_INPUT -WT_TCF4_IP,1,BLA203A3_S29_L006_R1_001.fastq.gz,,TCF4,WT_INPUT -WT_TCF4_IP,2,BLA203A27_S18_L001_R1_001.fastq.gz,,TCF4,WT_INPUT -WT_TCF4_IP,3,BLA203A51_S42_L001_R1_001.fastq.gz,,TCF4,WT_INPUT -NAIVE_TCF4_IP,1,BLA203A9_S62_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT -NAIVE_TCF4_IP,2,BLA203A45_S36_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT -NAIVE_TCF4_IP,3,BLA203A66_S57_L001_R1_001.fastq.gz,,TCF4,NAIVE_INPUT -WT_INPUT,1,BLA203A6_S32_L006_R1_001.fastq.gz,,, -WT_INPUT,2,BLA203A30_S21_L001_R1_001.fastq.gz,,, -WT_INPUT,3,BLA203A31_S21_L003_R1_001.fastq.gz,,, -NAIVE_INPUT,1,BLA203A12_S3_L001_R1_001.fastq.gz,,, -NAIVE_INPUT,2,BLA203A48_S39_L001_R1_001.fastq.gz,,, -NAIVE_INPUT,3,BLA203A49_S1_L006_R1_001.fastq.gz,,, +nextflow pull nf-core/chipseq ``` -| Column | Description | -|-------------|--------------------------------------------------------------------------------------------------------------------------------------------------| -| `group` | Group/condition identifier for sample. This will be identical for re-sequenced libraries and replicate samples from the same experimental group. | -| `replicate` | Integer representing replicate number. This will be identical for re-sequenced libraries. Must start from `1..`. | -| `fastq_1` | Full path to FastQ file for read 1. File has to be zipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for read 2. File has to be zipped and have the extension ".fastq.gz" or ".fq.gz". | -| `antibody` | Antibody name. This is required to segregate downstream analysis for different antibodies. Required when `control` is specified. | -| `control` | Group identifier for control sample. The pipeline will automatically select the control sample with the same replicate identifier as the IP. | - -Example design files have been provided with the pipeline for [paired-end](../assets/design_pe.csv) and [single-end](../assets/design_se.csv) data. - -## Generic arguments - -### `--single_end` - -By default, the pipeline expects paired-end data. If you have single-end data, specify `--single_end` on the command line when you launch the pipeline. - -It is not possible to run a mixture of single-end and paired-end files in one run. - -### `--seq_center` - -Sequencing center information that will be added to read groups in BAM files. - -### `--fragment_size` - -Number of base pairs to extend single-end reads when creating bigWig files (Default: `200`). - -### `--fingerprint_bins` +### Reproducibility -Number of genomic bins to use when generating the deepTools fingerprint plot. Larger numbers will give a smoother profile, but take longer to run (Default: `500000`). +It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since. -## Reference genomes +First, go to the [nf-core/chipseq releases page](https://github.com/nf-core/chipseq/releases) and find the latest version number - numeric only (eg. `1.2.2`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.2.2`. -The pipeline config files come bundled with paths to the illumina iGenomes reference index files. If running with docker or AWS, the configuration is set up to use the [AWS-iGenomes](https://ewels.github.io/AWS-iGenomes/) resource. +This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. -### `--genome` (using iGenomes) +## Core Nextflow arguments -There are 31 different species supported in the iGenomes references. To run the pipeline, you must specify which to use with the `--genome` flag. +> **NB:** These options are part of Nextflow and use a _single_ hyphen (pipeline parameters use a double-hyphen). -You can find the keys to specify the genomes in the [iGenomes config file](../conf/igenomes.config). Common genomes that are supported are: +### `-profile` -* Human - * `--genome GRCh37` -* Mouse - * `--genome GRCm38` -* _Drosophila_ - * `--genome BDGP6` -* _S. cerevisiae_ - * `--genome 'R64-1-1'` +Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments. -> There are numerous others - check the config file for more. +Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/). -Note that you can use the same configuration setup to save sets of reference files for your own use, even if they are not part of the iGenomes resource. See the [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for instructions on where to save such a file. +> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported. -The syntax for this reference configuration is as follows: +The pipeline also dynamically loads configurations from [https://github.com/nf-core/configs](https://github.com/nf-core/configs) when it runs, making multiple config profiles for various institutional clusters available at run time. For more information and to see if your system is available in these configs please see the [nf-core/configs documentation](https://github.com/nf-core/configs#documentation). -```nextflow -params { - genomes { - 'GRCh37' { - fasta = '' // Used if no star index given - } - // Any number of additional genomes, key is used with --genome - } -} -``` +Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important! +They are loaded in sequence, so later profiles can overwrite earlier profiles. -### `--fasta` +If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended. -Full path to fasta file containing reference genome (*mandatory* if `--genome` is not specified). If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs. +- `docker` + - A generic configuration profile to be used with [Docker](https://docker.com/) +- `singularity` + - A generic configuration profile to be used with [Singularity](https://sylabs.io/docs/) +- `podman` + - A generic configuration profile to be used with [Podman](https://podman.io/) +- `shifter` + - A generic configuration profile to be used with [Shifter](https://nersc.gitlab.io/development/shifter/how-to-use/) +- `charliecloud` + - A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/) +- `conda` + - A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud. +- `test` + - A profile with a complete configuration for automated testing + - Includes links to test data so needs no other parameters -```bash ---fasta '[path to FASTA reference]' -``` +### `-resume` -### `--gtf` +Specify this when restarting a pipeline. Nextflow will use cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. For input to be considered the same, not only the names must be identical but the files' contents as well. For more info about this parameter, see [this blog post](https://www.nextflow.io/blog/2019/demystifying-nextflow-resume.html). -The full path to GTF file for annotating peaks (*mandatory* if `--genome` is not specified). Note that the GTF file should resemble the Ensembl format. +You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. -```bash ---gtf '[path to GTF file]' -``` +### `-c` -### `--bwa_index` +Specify the path to a specific config file (this is a core Nextflow command). See the [nf-core website documentation](https://nf-co.re/usage/configuration) for more information. -Full path to an existing BWA index for your reference genome including the base name for the index. +## Custom configuration -```bash ---bwa_index '[directory containing BWA index]/genome.fa' -``` +### Resource requests -### `--gene_bed` +Whilst the default requirements set within the pipeline will hopefully work for most people and with most input data, you may find that you want to customise the compute resources that the pipeline requests. Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with any of the error codes specified [here](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L18) it will automatically be resubmitted with higher requests (2 x original, then 3 x original). If it still fails after the third attempt then the pipeline execution is stopped. -The full path to BED file for genome-wide gene intervals. This will be created from the GTF file if not specified. +For example, if the nf-core/rnaseq pipeline is failing after multiple re-submissions of the `STAR_ALIGN` process due to an exit code of `137` this would indicate that there is an out of memory issue: -```bash ---gene_bed '[path to gene BED file]' -``` +```console +[62/149eb0] NOTE: Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -- Execution is retried (1) +Error executing process > 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)' -### `--macs_gsize` +Caused by: + Process `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN (WT_REP1)` terminated with an error exit status (137) -[Effective genome size](https://github.com/taoliu/MACS#-g--gsize) parameter required by MACS2. These have been provided when `--genome` is set as *GRCh37*, *GRCh38*, *GRCm38*, *WBcel235*, *BDGP6*, *R64-1-1*, *EF2*, *hg38*, *hg19* and *mm10*. For other genomes, if this parameter is not specified then the MACS2 peak-calling and differential analysis will be skipped. +Command executed: + STAR \ + --genomeDir star \ + --readFilesIn WT_REP1_trimmed.fq.gz \ + --runThreadN 2 \ + --outFileNamePrefix WT_REP1. \ + -```bash ---macs_gsize 2.7e9 -``` +Command exit status: + 137 -### `--blacklist` +Command output: + (empty) -If provided, alignments that overlap with the regions in this file will be filtered out (see [ENCODE blacklists](https://sites.google.com/site/anshulkundaje/projects/blacklists)). The file should be in BED format. Blacklisted regions for *GRCh37*, *GRCh38*, *GRCm38*, *hg19*, *hg38*, *mm10* are bundled with the pipeline in the [`blacklists`](../assets/blacklists/) directory, and as such will be automatically used if any of those genomes are specified with the `--genome` parameter. +Command error: + .command.sh: line 9: 30 Killed STAR --genomeDir star --readFilesIn WT_REP1_trimmed.fq.gz --runThreadN 2 --outFileNamePrefix WT_REP1. +Work dir: + /home/pipelinetest/work/9d/172ca5881234073e8d76f2a19c88fb -```bash ---blacklist '[path to blacklisted regions]' +Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run` ``` -### `--save_reference` - -If the BWA index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times (Default: false). - -### `--igenomes_ignore` - -Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config` (Default: false). - -## Adapter trimming - -The pipeline accepts a number of parameters to change how the trimming is done, according to your data type. -You can specify custom trimming parameters as follows: - -* `--clip_r1 [int]` - * Instructs Trim Galore to remove [int] bp from the 5' end of read 1 (for single-end reads). -* `--clip_r2 [int]` - * Instructs Trim Galore to remove [int] bp from the 5' end of read 2 (paired-end reads only). -* `--three_prime_clip_r1 [int]` - * Instructs Trim Galore to remove [int] bp from the 3' end of read 1 _AFTER_ adapter/quality trimming has been -* `--three_prime_clip_r2 [int]` - * Instructs Trim Galore to remove [int] bp from the 3' end of read 2 _AFTER_ adapter/quality trimming has been performed. -* `--trim_nextseq [int]` - * This enables the option Cutadapt `--nextseq-trim=3'CUTOFF` option via Trim Galore, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases. - -### `--skip_trimming` +To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN). +We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`. +If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9). +The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements. +The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB. +Providing you haven't set any other standard nf-core parameters to **cap** the [maximum resources](https://nf-co.re/usage/configuration#max-resources) used by the pipeline then we can try and bypass the `STAR_ALIGN` process failure by creating a custom config file that sets at least 72GB of memory, in this case increased to 100GB. +The custom config below can then be provided to the pipeline via the [`-c`](#-c) parameter as highlighted in previous sections. -Skip the adapter trimming step. Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data (Default: false). - -### `--save_trimmed` - -By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete (Default: false). - -## Alignments - -### `--bwa_min_score` - -Don’t output BWA MEM alignments with score lower than this parameter (Default: false). - -### `--keep_dups` - -Duplicate reads are not filtered from alignments (Default: false). - -### `--keep_multi_map` - -Reads mapping to multiple locations in the genome are not filtered from alignments (Default: false). - -### `--save_align_intermeds` - -By default, intermediate BAM files will not be saved. The final BAM files created after the appropriate filtering step are always saved to limit storage usage. Set to true to also save other intermediate BAM files (Default: false). - -## Peaks - -### `--narrow_peak` - -MACS2 is run by default with the [`--broad`](https://github.com/taoliu/MACS#--broad) flag. Specify this flag to call peaks in narrowPeak mode (Default: false). - -### `--broad_cutoff` - -Specifies broad cut-off value for MACS2. Only used when `--narrow_peak` isnt specified (Default: `0.1`). - -### `--macs_fdr` - -Minimum FDR (q-value) cutoff for peak detection, `--macs_fdr` and `--macs_pvalue` are mutually exclusive (Default: false). - -### `--macs_pvalue` - -p-value cutoff for peak detection, `--macs_fdr` and `--macs_pvalue` are mutually exclusive (Default: false). If `--macs_pvalue` cutoff is set, q-value will not be calculated and reported as -1 in the final .xls file. - -### `--min_reps_consensus` - -Number of biological replicates required from a given condition for a peak to contribute to a consensus peak . If you are confident you have good reproducibility amongst your replicates then you can increase the value of this parameter to create a "reproducible" set of consensus of peaks. For example, a value of 2 will mean peaks that have been called in at least 2 replicates will contribute to the consensus set of peaks, and as such peaks that are unique to a given replicate will be discarded (Default: 1). - -```bash --- min_reps_consensus 1 +```nextflow +process { + withName: 'NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN' { + memory = 100.GB + } +} ``` -### `--save_macs_pileup` - -Instruct MACS2 to create bedGraph files using the `-B --SPMR` parameters (Default: false). - -### `--skip_peak_qc` - -Skip MACS2 peak QC plot generation (Default: false). - -### `--skip_peak_annotation` - -Skip annotation of MACS2 and consensus peaks with HOMER (Default: false). - -### `--skip_consensus_peaks` - -Skip consensus peak generation, annotation and counting (Default: false). - -## Differential analysis - -### `--deseq2_vst` - -Use `vst` transformation instead of `rlog` with DESeq2. See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization) (Default: false). - -### `--skip_diff_analysis` - -Skip differential binding analysis with DESeq2 (Default: false). - -## Skipping QC steps - -The pipeline contains a large number of quality control steps. Sometimes, it may not be desirable to run all of them if time and compute resources are limited. -The following options make this easy: - -| Step | Description | -|---------------------------|------------------------------------| -| `--skip_fastqc` | Skip FastQC | -| `--skip_picard_metrics` | Skip Picard CollectMultipleMetrics | -| `--skip_preseq` | Skip Preseq | -| `--skip_plot_profile` | Skip deepTools plotProfile | -| `--skip_plot_fingerprint` | Skip deepTools plotFingerprint | -| `--skip_spp` | Skip Phantompeakqualtools | -| `--skip_igv` | Skip IGV | -| `--skip_multiqc` | Skip MultiQC | - -## Job resources +> **NB:** We specify the full process name i.e. `NFCORE_RNASEQ:RNASEQ:ALIGN_STAR:STAR_ALIGN` in the config file because this takes priority over the short name (`STAR_ALIGN`) and allows existing configuration using the full process name to be correctly overridden. +> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly. -### Automatic resubmission +### Updating containers -Each step in the pipeline has a default set of requirements for number of CPUs, memory and time. For most of the steps in the pipeline, if the job exits with an error code of `143` (exceeded requested resources) it will automatically resubmit with higher requests (2 x original, then 3 x original). If it still fails after three times then the pipeline is stopped. +The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration. For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`. -### Custom resource requests +1. Check the default version used by the pipeline in the module file for [Pangolin](https://github.com/nf-core/viralrecon/blob/a85d5969f9025409e3618d6c280ef15ce417df65/modules/nf-core/software/pangolin/main.nf#L14-L19) +2. Find the latest version of the Biocontainer available on [Quay.io](https://quay.io/repository/biocontainers/pangolin?tag=latest&tab=tags) +3. Create the custom config accordingly: -Wherever process-specific requirements are set in the pipeline, the default value can be changed by creating a custom config file. See the files hosted at [`nf-core/configs`](https://github.com/nf-core/configs/tree/master/conf) for examples. + - For Docker: -If you are likely to be running `nf-core` pipelines regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter (see definition below). You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. + ```nextflow + process { + withName: PANGOLIN { + container = 'quay.io/biocontainers/pangolin:3.0.5--pyhdfd78af_0' + } + } + ``` -If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack). + - For Singularity: -## AWS Batch specific parameters + ```nextflow + process { + withName: PANGOLIN { + container = 'https://depot.galaxyproject.org/singularity/pangolin:3.0.5--pyhdfd78af_0' + } + } + ``` -Running the pipeline on AWS Batch requires a couple of specific parameters to be set according to your AWS Batch configuration. Please use [`-profile awsbatch`](https://github.com/nf-core/configs/blob/master/conf/awsbatch.config) and then specify all of the following parameters. + - For Conda: -### `--awsqueue` + ```nextflow + process { + withName: PANGOLIN { + conda = 'bioconda::pangolin=3.0.5' + } + } + ``` -The JobQueue that you intend to use on AWS Batch. +> **NB:** If you wish to periodically update individual tool-specific results (e.g. Pangolin) generated by the pipeline then you must ensure to keep the `work/` directory otherwise the `-resume` ability of the pipeline will be compromised and it will restart from scratch. -### `--awsregion` +### nf-core/configs -The AWS region in which to run your job. Default is set to `eu-west-1` but can be adjusted to your needs. +In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. -### `--awscli` +See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config.html) for more information about creating your own configuration files. -The [AWS CLI](https://www.nextflow.io/docs/latest/awscloud.html#aws-cli-installation) path in your custom AMI. Default: `/home/ec2-user/miniconda/bin/aws`. +If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs). -Please make sure to also set the `-w/--work-dir` and `--outdir` parameters to a S3 storage bucket of your choice - you'll get an error message notifying you if you didn't. +## Running in the background -## Other command line parameters +Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished. -### `--outdir` +The Nextflow `-bg` flag launches Nextflow in the background, detached from your terminal so that the workflow does not stop if you log out of your session. The logs are saved to a file. -The output directory where the results will be saved. +Alternatively, you can use `screen` / `tmux` or similar tool to create a detached session which you can log back into at a later time. +Some HPC setups also allow you to run nextflow within a cluster job submitted your job scheduler (from where it submits more jobs). -### `--publish_dir_mode` +## Nextflow memory requirements -Value passed to Nextflow [`publishDir`](https://www.nextflow.io/docs/latest/process.html#publishdir) directive for publishing results in the output directory. Available: 'symlink', 'rellink', 'link', 'copy', 'copyNoFollow' and 'move' (Default: 'copy'). - -### `--email` - -Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run. - -### `--email_on_fail` - -This works exactly as with `--email`, except emails are only sent if the workflow is not successful. - -### `--max_multiqc_email_size` - -Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB). - -### `-name` - -Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic. - -This is used in the MultiQC report (if not default) and in the summary HTML / e-mail (always). - -**NB:** Single hyphen (core Nextflow option) - -### `-resume` - -Specify this when restarting a pipeline. Nextflow will used cached results from any pipeline steps where the inputs are the same, continuing from where it got to previously. - -You can also supply a run name to resume a specific run: `-resume [run-name]`. Use the `nextflow log` command to show previous run names. - -**NB:** Single hyphen (core Nextflow option) - -### `-c` - -Specify the path to a specific config file (this is a core NextFlow command). - -**NB:** Single hyphen (core Nextflow option) - -Note - you can use this to override pipeline defaults. - -### `--custom_config_version` - -Provide git commit id for custom Institutional configs hosted at `nf-core/configs`. This was implemented for reproducibility purposes. Default: `master`. +In some cases, the Nextflow Java virtual machines can start to request a large amount of memory. +We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`): ```bash -## Download and use config file with following git commid id ---custom_config_version d52db660777c4bf36546ddb188ec530c3ada1b96 -``` - -### `--custom_config_base` - -If you're running offline, nextflow will not be able to fetch the institutional config files -from the internet. If you don't need them, then this is not a problem. If you do need them, -you should download the files from the repo and tell nextflow where to find them with the -`custom_config_base` option. For example: - -```bash -## Download and unzip the config files -cd /path/to/my/configs -wget https://github.com/nf-core/configs/archive/master.zip -unzip master.zip - -## Run the pipeline -cd /path/to/my/data -nextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/ +NXF_OPTS='-Xms1g -Xmx4g' ``` - -> Note that the nf-core/tools helper package has a `download` command to download all required pipeline -> files + singularity containers + institutional configs in one go for you, to make this process easier. - -### `--max_memory` - -Use to set a top-limit for the default memory requirement for each process. -Should be a string in the format integer-unit. eg. `--max_memory '8.GB'` - -### `--max_time` - -Use to set a top-limit for the default time requirement for each process. -Should be a string in the format integer-unit. eg. `--max_time '2.h'` - -### `--max_cpus` - -Use to set a top-limit for the default CPU requirement for each process. -Should be a string in the format integer-unit. eg. `--max_cpus 1` - -### `--plaintext_email` - -Set to receive plain-text e-mails instead of HTML formatted. - -### `--monochrome_logs` - -Set to disable colourful command line output and live life in monochrome. - -### `--multiqc_config` - -Specify a path to a custom MultiQC configuration file. diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 763fe309f..000000000 --- a/environment.yml +++ /dev/null @@ -1,47 +0,0 @@ -# You can use this file to create a conda environment for this pipeline: -# conda env create -f environment.yml -name: nf-core-chipseq-1.2.2 -channels: - - conda-forge - - bioconda - - defaults -dependencies: - ## conda-forge packages - - conda-forge::python=3.7.6 - - conda-forge::markdown=3.2.2 - - conda-forge::pymdown-extensions=7.1 - - conda-forge::pygments=2.6.1 - - conda-forge::r-base=3.6.3 - - conda-forge::r-optparse=1.6.6 - - conda-forge::r-rcolorbrewer=1.1_2 - - conda-forge::r-reshape2=1.4.4 - - conda-forge::r-ggplot2=3.3.2 - - conda-forge::r-tidyr=1.1.0 - - conda-forge::r-scales=1.1.1 - - conda-forge::r-pheatmap=1.0.12 - - conda-forge::r-lattice=0.20_41 - - conda-forge::r-upsetr=1.4.0 - - conda-forge::r-xfun=0.20 - - conda-forge::gawk=5.1.0 - - conda-forge::pigz=2.3.4 ## Required for TrimGalore multi-threading - - ## bioconda packages - - bioconda::fastqc=0.11.9 - - bioconda::trim-galore=0.6.5 - - bioconda::bwa=0.7.17 - - bioconda::samtools=1.10 - - bioconda::picard=2.23.1 - - bioconda::bamtools=2.5.1 - - bioconda::pysam=0.15.3 - - bioconda::bedtools=2.29.2 - - bioconda::ucsc-bedgraphtobigwig=357 - - bioconda::deeptools=3.4.3 - - bioconda::macs2=2.2.7.1 - - bioconda::homer=4.11 - - bioconda::subread=2.0.1 - - bioconda::phantompeakqualtools=1.2.2 - - bioconda::preseq=2.0.3 - - bioconda::multiqc=1.9 - - bioconda::bioconductor-biocparallel=1.20.0 - - bioconda::bioconductor-deseq2=1.26.0 - - bioconda::bioconductor-vsn=3.54.0 diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy new file mode 100755 index 000000000..b3d092f80 --- /dev/null +++ b/lib/NfcoreSchema.groovy @@ -0,0 +1,529 @@ +// +// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. +// + +import org.everit.json.schema.Schema +import org.everit.json.schema.loader.SchemaLoader +import org.everit.json.schema.ValidationException +import org.json.JSONObject +import org.json.JSONTokener +import org.json.JSONArray +import groovy.json.JsonSlurper +import groovy.json.JsonBuilder + +class NfcoreSchema { + + // + // Resolve Schema path relative to main workflow directory + // + public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { + return "${workflow.projectDir}/${schema_filename}" + } + + // + // Function to loop over all parameters defined in schema and check + // whether the given parameters adhere to the specifications + // + /* groovylint-disable-next-line UnusedPrivateMethodParameter */ + public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { + def has_error = false + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + // Check for nextflow core params and unexpected params + def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text + def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') + def nf_params = [ + // Options for base `nextflow` command + 'bg', + 'c', + 'C', + 'config', + 'd', + 'D', + 'dockerize', + 'h', + 'log', + 'q', + 'quiet', + 'syslog', + 'v', + 'version', + + // Options for `nextflow run` command + 'ansi', + 'ansi-log', + 'bg', + 'bucket-dir', + 'c', + 'cache', + 'config', + 'dsl2', + 'dump-channels', + 'dump-hashes', + 'E', + 'entry', + 'latest', + 'lib', + 'main-script', + 'N', + 'name', + 'offline', + 'params-file', + 'pi', + 'plugins', + 'poll-interval', + 'pool-size', + 'profile', + 'ps', + 'qs', + 'queue-size', + 'r', + 'resume', + 'revision', + 'stdin', + 'stub', + 'stub-run', + 'test', + 'w', + 'with-charliecloud', + 'with-conda', + 'with-dag', + 'with-docker', + 'with-mpi', + 'with-notification', + 'with-podman', + 'with-report', + 'with-singularity', + 'with-timeline', + 'with-tower', + 'with-trace', + 'with-weblog', + 'without-docker', + 'without-podman', + 'work-dir' + ] + def unexpectedParams = [] + + // Collect expected parameters from the schema + def expectedParams = [] + def enums = [:] + for (group in schemaParams) { + for (p in group.value['properties']) { + expectedParams.push(p.key) + if (group.value['properties'][p.key].containsKey('enum')) { + enums[p.key] = group.value['properties'][p.key]['enum'] + } + } + } + + for (specifiedParam in params.keySet()) { + // nextflow params + if (nf_params.contains(specifiedParam)) { + log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" + has_error = true + } + // unexpected params + def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' + def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } + def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() + def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) + if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { + // Temporarily remove camelCase/camel-case params #1035 + def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} + if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ + unexpectedParams.push(specifiedParam) + } + } + } + + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// + // Validate parameters against the schema + InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() + JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) + + // Remove anything that's in params.schema_ignore_params + raw_schema = removeIgnoredParams(raw_schema, params) + + Schema schema = SchemaLoader.load(raw_schema) + + // Clean the parameters + def cleanedParams = cleanParameters(params) + + // Convert to JSONObject + def jsonParams = new JsonBuilder(cleanedParams) + JSONObject params_json = new JSONObject(jsonParams.toString()) + + // Validate + try { + schema.validate(params_json) + } catch (ValidationException e) { + println '' + log.error 'ERROR: Validation of pipeline parameters failed!' + JSONObject exceptionJSON = e.toJSON() + printExceptions(exceptionJSON, params_json, log, enums) + println '' + has_error = true + } + + // Check for unexpected parameters + if (unexpectedParams.size() > 0) { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) + println '' + def warn_msg = 'Found unexpected parameters:' + for (unexpectedParam in unexpectedParams) { + warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" + } + log.warn warn_msg + log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" + println '' + } + + if (has_error) { + System.exit(1) + } + } + + // + // Beautify parameters for --help + // + public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) + Integer num_hidden = 0 + String output = '' + output += 'Typical pipeline command:\n\n' + output += " ${colors.cyan}${command}${colors.reset}\n\n" + Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + Integer max_chars = paramsMaxChars(params_map) + 1 + Integer desc_indent = max_chars + 14 + Integer dec_linewidth = 160 - desc_indent + for (group in params_map.keySet()) { + Integer num_params = 0 + String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (group_params.get(param).hidden && !params.show_hidden_params) { + num_hidden += 1 + continue; + } + def type = '[' + group_params.get(param).type + ']' + def description = group_params.get(param).description + def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' + def description_default = description + colors.dim + defaultValue + colors.reset + // Wrap long description texts + // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap + if (description_default.length() > dec_linewidth){ + List olines = [] + String oline = "" // " " * indent + description_default.split(" ").each() { wrd -> + if ((oline.size() + wrd.size()) <= dec_linewidth) { + oline += wrd + " " + } else { + olines += oline + oline = wrd + " " + } + } + olines += oline + description_default = olines.join("\n" + " " * desc_indent) + } + group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' + num_params += 1 + } + group_output += '\n' + if (num_params > 0){ + output += group_output + } + } + if (num_hidden > 0){ + output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset + } + output += NfcoreTemplate.dashedLine(params.monochrome_logs) + return output + } + + // + // Groovy Map summarising parameters/workflow options used by the pipeline + // + public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { + // Get a selection of core Nextflow workflow options + def Map workflow_summary = [:] + if (workflow.revision) { + workflow_summary['revision'] = workflow.revision + } + workflow_summary['runName'] = workflow.runName + if (workflow.containerEngine) { + workflow_summary['containerEngine'] = workflow.containerEngine + } + if (workflow.container) { + workflow_summary['container'] = workflow.container + } + workflow_summary['launchDir'] = workflow.launchDir + workflow_summary['workDir'] = workflow.workDir + workflow_summary['projectDir'] = workflow.projectDir + workflow_summary['userName'] = workflow.userName + workflow_summary['profile'] = workflow.profile + workflow_summary['configFiles'] = workflow.configFiles.join(', ') + + // Get pipeline parameters defined in JSON Schema + def Map params_summary = [:] + def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) + for (group in params_map.keySet()) { + def sub_params = new LinkedHashMap() + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (params.containsKey(param)) { + def params_value = params.get(param) + def schema_value = group_params.get(param).default + def param_type = group_params.get(param).type + if (schema_value != null) { + if (param_type == 'string') { + if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { + def sub_string = schema_value.replace('\$projectDir', '') + sub_string = sub_string.replace('\${projectDir}', '') + if (params_value.contains(sub_string)) { + schema_value = params_value + } + } + if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { + def sub_string = schema_value.replace('\$params.outdir', '') + sub_string = sub_string.replace('\${params.outdir}', '') + if ("${params.outdir}${sub_string}" == params_value) { + schema_value = params_value + } + } + } + } + + // We have a default in the schema, and this isn't it + if (schema_value != null && params_value != schema_value) { + sub_params.put(param, params_value) + } + // No default in the schema, and this isn't empty + else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { + sub_params.put(param, params_value) + } + } + } + params_summary.put(group, sub_params) + } + return [ 'Core Nextflow options' : workflow_summary ] << params_summary + } + + // + // Beautify parameters for summary and return as string + // + public static String paramsSummaryLog(workflow, params) { + Map colors = NfcoreTemplate.logColours(params.monochrome_logs) + String output = '' + def params_map = paramsSummaryMap(workflow, params) + def max_chars = paramsMaxChars(params_map) + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + if (group_params) { + output += colors.bold + group + colors.reset + '\n' + for (param in group_params.keySet()) { + output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' + } + output += '\n' + } + } + output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" + output += NfcoreTemplate.dashedLine(params.monochrome_logs) + return output + } + + // + // Loop over nested exceptions and print the causingException + // + private static void printExceptions(ex_json, params_json, log, enums, limit=5) { + def causingExceptions = ex_json['causingExceptions'] + if (causingExceptions.length() == 0) { + def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ + // Missing required param + if (m.matches()) { + log.error "* Missing required parameter: --${m[0][1]}" + } + // Other base-level error + else if (ex_json['pointerToViolation'] == '#') { + log.error "* ${ex_json['message']}" + } + // Error with specific param + else { + def param = ex_json['pointerToViolation'] - ~/^#\// + def param_val = params_json[param].toString() + if (enums.containsKey(param)) { + def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" + if (enums[param].size() > limit) { + log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" + } else { + log.error "${error_msg}: ${enums[param].join(', ')})" + } + } else { + log.error "* --${param}: ${ex_json['message']} (${param_val})" + } + } + } + for (ex in causingExceptions) { + printExceptions(ex, params_json, log, enums) + } + } + + // + // Remove an element from a JSONArray + // + private static JSONArray removeElement(json_array, element) { + def list = [] + int len = json_array.length() + for (int i=0;i + if(raw_schema.keySet().contains('definitions')){ + raw_schema.definitions.each { definition -> + for (key in definition.keySet()){ + if (definition[key].get("properties").keySet().contains(ignore_param)){ + // Remove the param to ignore + definition[key].get("properties").remove(ignore_param) + // If the param was required, change this + if (definition[key].has("required")) { + def cleaned_required = removeElement(definition[key].required, ignore_param) + definition[key].put("required", cleaned_required) + } + } + } + } + } + if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { + raw_schema.get("properties").remove(ignore_param) + } + if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { + def cleaned_required = removeElement(raw_schema.required, ignore_param) + raw_schema.put("required", cleaned_required) + } + } + return raw_schema + } + + // + // Clean and check parameters relative to Nextflow native classes + // + private static Map cleanParameters(params) { + def new_params = params.getClass().newInstance(params) + for (p in params) { + // remove anything evaluating to false + if (!p['value']) { + new_params.remove(p.key) + } + // Cast MemoryUnit to String + if (p['value'].getClass() == nextflow.util.MemoryUnit) { + new_params.replace(p.key, p['value'].toString()) + } + // Cast Duration to String + if (p['value'].getClass() == nextflow.util.Duration) { + new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) + } + // Cast LinkedHashMap to String + if (p['value'].getClass() == LinkedHashMap) { + new_params.replace(p.key, p['value'].toString()) + } + } + return new_params + } + + // + // This function tries to read a JSON params file + // + private static LinkedHashMap paramsLoad(String json_schema) { + def params_map = new LinkedHashMap() + try { + params_map = paramsRead(json_schema) + } catch (Exception e) { + println "Could not read parameters settings from JSON. $e" + params_map = new LinkedHashMap() + } + return params_map + } + + // + // Method to actually read in JSON file using Groovy. + // Group (as Key), values are all parameters + // - Parameter1 as Key, Description as Value + // - Parameter2 as Key, Description as Value + // .... + // Group + // - + private static LinkedHashMap paramsRead(String json_schema) throws Exception { + def json = new File(json_schema).text + def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') + def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') + /* Tree looks like this in nf-core schema + * definitions <- this is what the first get('definitions') gets us + group 1 + title + description + properties + parameter 1 + type + description + parameter 2 + type + description + group 2 + title + description + properties + parameter 1 + type + description + * properties <- parameters can also be ungrouped, outside of definitions + parameter 1 + type + description + */ + + // Grouped params + def params_map = new LinkedHashMap() + schema_definitions.each { key, val -> + def Map group = schema_definitions."$key".properties // Gets the property object of the group + def title = schema_definitions."$key".title + def sub_params = new LinkedHashMap() + group.each { innerkey, value -> + sub_params.put(innerkey, value) + } + params_map.put(title, sub_params) + } + + // Ungrouped params + def ungrouped_params = new LinkedHashMap() + schema_properties.each { innerkey, value -> + ungrouped_params.put(innerkey, value) + } + params_map.put("Other parameters", ungrouped_params) + + return params_map + } + + // + // Get maximum number of characters across all parameter names + // + private static Integer paramsMaxChars(params_map) { + Integer max_chars = 0 + for (group in params_map.keySet()) { + def group_params = params_map.get(group) // This gets the parameters of that particular group + for (param in group_params.keySet()) { + if (param.size() > max_chars) { + max_chars = param.size() + } + } + } + return max_chars + } +} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy new file mode 100755 index 000000000..2fc0a9b9b --- /dev/null +++ b/lib/NfcoreTemplate.groovy @@ -0,0 +1,258 @@ +// +// This file holds several functions used within the nf-core pipeline template. +// + +import org.yaml.snakeyaml.Yaml + +class NfcoreTemplate { + + // + // Check AWS Batch related parameters have been specified correctly + // + public static void awsBatch(workflow, params) { + if (workflow.profile.contains('awsbatch')) { + // Check params.awsqueue and params.awsregion have been set if running on AWSBatch + assert (params.awsqueue && params.awsregion) : "Specify correct --awsqueue and --awsregion parameters on AWSBatch!" + // Check outdir paths to be S3 buckets if running on AWSBatch + assert params.outdir.startsWith('s3:') : "Outdir not on S3 - specify S3 Bucket to run on AWSBatch!" + } + } + + // + // Warn if a -profile or Nextflow config has not been provided to run the pipeline + // + public static void checkConfigProvided(workflow, log) { + if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { + log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + + " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" + + " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + + "Please refer to the quick start section and usage docs for the pipeline.\n " + } + } + + // + // Construct and send completion email + // + public static void email(workflow, params, summary_params, projectDir, log, multiqc_report=[]) { + + // Set up the e-mail variables + def subject = "[$workflow.manifest.name] Successful: $workflow.runName" + if (!workflow.success) { + subject = "[$workflow.manifest.name] FAILED: $workflow.runName" + } + + def summary = [:] + for (group in summary_params.keySet()) { + summary << summary_params[group] + } + + def misc_fields = [:] + misc_fields['Date Started'] = workflow.start + misc_fields['Date Completed'] = workflow.complete + misc_fields['Pipeline script file path'] = workflow.scriptFile + misc_fields['Pipeline script hash ID'] = workflow.scriptId + if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository + if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId + if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision + misc_fields['Nextflow Version'] = workflow.nextflow.version + misc_fields['Nextflow Build'] = workflow.nextflow.build + misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp + + def email_fields = [:] + email_fields['version'] = workflow.manifest.version + email_fields['runName'] = workflow.runName + email_fields['success'] = workflow.success + email_fields['dateComplete'] = workflow.complete + email_fields['duration'] = workflow.duration + email_fields['exitStatus'] = workflow.exitStatus + email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') + email_fields['errorReport'] = (workflow.errorReport ?: 'None') + email_fields['commandLine'] = workflow.commandLine + email_fields['projectDir'] = workflow.projectDir + email_fields['summary'] = summary << misc_fields + + // On success try attach the multiqc report + def mqc_report = null + try { + if (workflow.success) { + mqc_report = multiqc_report.getVal() + if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { + if (mqc_report.size() > 1) { + log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" + } + mqc_report = mqc_report[0] + } + } + } catch (all) { + if (multiqc_report) { + log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" + } + } + + // Check if we are only sending emails on failure + def email_address = params.email + if (!params.email && params.email_on_fail && !workflow.success) { + email_address = params.email_on_fail + } + + // Render the TXT template + def engine = new groovy.text.GStringTemplateEngine() + def tf = new File("$projectDir/assets/email_template.txt") + def txt_template = engine.createTemplate(tf).make(email_fields) + def email_txt = txt_template.toString() + + // Render the HTML template + def hf = new File("$projectDir/assets/email_template.html") + def html_template = engine.createTemplate(hf).make(email_fields) + def email_html = html_template.toString() + + // Render the sendmail template + def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] + def sf = new File("$projectDir/assets/sendmail_template.txt") + def sendmail_template = engine.createTemplate(sf).make(smail_fields) + def sendmail_html = sendmail_template.toString() + + // Send the HTML e-mail + Map colors = logColours(params.monochrome_logs) + if (email_address) { + try { + if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } + // Try to send HTML e-mail using sendmail + [ 'sendmail', '-t' ].execute() << sendmail_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-" + } catch (all) { + // Catch failures and try with plaintext + def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ] + if ( mqc_report.size() <= max_multiqc_email_size.toBytes() ) { + mail_cmd += [ '-A', mqc_report ] + } + mail_cmd.execute() << email_html + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-" + } + } + + // Write summary e-mail HTML to a file + def output_d = new File("${params.outdir}/pipeline_info/") + if (!output_d.exists()) { + output_d.mkdirs() + } + def output_hf = new File(output_d, "pipeline_report.html") + output_hf.withWriter { w -> w << email_html } + def output_tf = new File(output_d, "pipeline_report.txt") + output_tf.withWriter { w -> w << email_txt } + } + + // + // Print pipeline summary on completion + // + public static void summary(workflow, params, log) { + Map colors = logColours(params.monochrome_logs) + if (workflow.success) { + if (workflow.stats.ignoredCount == 0) { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-" + } + } else { + log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-" + } + } + + // + // ANSII Colours used for terminal logging + // + public static Map logColours(Boolean monochrome_logs) { + Map colorcodes = [:] + + // Reset / Meta + colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" + colorcodes['bold'] = monochrome_logs ? '' : "\033[1m" + colorcodes['dim'] = monochrome_logs ? '' : "\033[2m" + colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m" + colorcodes['blink'] = monochrome_logs ? '' : "\033[5m" + colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m" + colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m" + + // Regular Colors + colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m" + colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m" + colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m" + colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m" + colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m" + colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m" + colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m" + colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m" + + // Bold + colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m" + colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m" + colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m" + colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m" + colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m" + colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m" + colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m" + colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m" + + // Underline + colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m" + colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m" + colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m" + colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m" + colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m" + colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m" + colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m" + colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m" + + // High Intensity + colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m" + colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m" + colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m" + colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m" + colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m" + colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m" + colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m" + colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m" + + // Bold High Intensity + colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m" + colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m" + colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m" + colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m" + colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m" + colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m" + colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m" + colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m" + + return colorcodes + } + + // + // Does what is says on the tin + // + public static String dashedLine(monochrome_logs) { + Map colors = logColours(monochrome_logs) + return "-${colors.dim}----------------------------------------------------${colors.reset}-" + } + + // + // nf-core logo + // + public static String logo(workflow, monochrome_logs) { + Map colors = logColours(monochrome_logs) + String.format( + """\n + ${dashedLine(monochrome_logs)} + ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset} + ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset} + ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset} + ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset} + ${colors.green}`._,._,\'${colors.reset} + ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset} + ${dashedLine(monochrome_logs)} + """.stripIndent() + ) + } +} diff --git a/lib/Utils.groovy b/lib/Utils.groovy new file mode 100755 index 000000000..28567bd70 --- /dev/null +++ b/lib/Utils.groovy @@ -0,0 +1,40 @@ +// +// This file holds several Groovy functions that could be useful for any Nextflow pipeline +// + +import org.yaml.snakeyaml.Yaml + +class Utils { + + // + // When running with -profile conda, warn if channels have not been set-up appropriately + // + public static void checkCondaChannels(log) { + Yaml parser = new Yaml() + def channels = [] + try { + def config = parser.load("conda config --show channels".execute().text) + channels = config.channels + } catch(NullPointerException | IOException e) { + log.warn "Could not verify conda channel configuration." + return + } + + // Check that all channels are present + def required_channels = ['conda-forge', 'bioconda', 'defaults'] + def conda_check_failed = !required_channels.every { ch -> ch in channels } + + // Check that they are in the right order + conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda')) + conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults')) + + if (conda_check_failed) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " There is a problem with your Conda configuration!\n\n" + + " You will need to set-up the conda-forge and bioconda channels correctly.\n" + + " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" + + " NB: The order of the channels matters!\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + } +} diff --git a/lib/WorkflowChipseq.groovy b/lib/WorkflowChipseq.groovy new file mode 100755 index 000000000..dfae806fc --- /dev/null +++ b/lib/WorkflowChipseq.groovy @@ -0,0 +1,105 @@ +// +// This file holds several functions specific to the workflow/chipseq.nf in the nf-core/chipseq pipeline +// + +class WorkflowChipseq { + + // + // Check and validate parameters + // + public static void initialise(params, log, valid_params) { + genomeExistsError(params, log) + + + if (!params.fasta) { + log.error "Genome fasta file not specified with e.g. '--fasta' or via a detectable config file." + System.exit(1) + } + + if (!params.gtf && !params.gff) { + log.error "No GTF or GFF3 annotation specified! The pipeline requires at least one of these files." + System.exit(1) + } + + if (params.gtf && params.gff) { + gtfGffWarn(log) + } + + if (!params.macs_gsize) { + macsGsizeWarn(log) + } + + if (!params.read_length && !params.macs_gsize) { + log.error "Both '--read_length' and '--macs_gsize' not specified! Please specify either to infer MACS2 genome size for peak calling." + System.exit(1) + } + + if (params.aligner) { + if (!valid_params['aligners'].contains(params.aligner)) { + log.error "Invalid option: '${params.aligner}'. Valid options for '--aligner': ${valid_params['aligners'].join(', ')}." + System.exit(1) + } + } + } + + // + // Get workflow summary for MultiQC + // + public static String paramsSummaryMultiqc(workflow, summary) { + String summary_section = '' + for (group in summary.keySet()) { + def group_params = summary.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

$group

\n" + summary_section += "
\n" + for (param in group_params.keySet()) { + summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" + } + summary_section += "
\n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + return yaml_file_text + }// + // Exit pipeline if incorrect --genome key provided + // + private static void genomeExistsError(params, log) { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + System.exit(1) + } + } + + // + // Print a warning if both GTF and GFF have been provided + // + private static void gtfGffWarn(log) { + log.warn "=============================================================================\n" + + " Both '--gtf' and '--gff' parameters have been provided.\n" + + " Using GTF file as priority.\n" + + "===================================================================================" + } + + // + // Print a warning if macs_gsize parameter has not been provided + // + private static void macsGsizeWarn(log) { + log.warn "=============================================================================\n" + + " --macs_gsize parameter has not been provided.\n" + + " It will be auto-calculated by 'khmer unique-kmers.py' using the '--read_length' parameter.\n" + + " Explicitly provide '--macs_gsize' to change this behaviour.\n" + + "===================================================================================" + } + +} diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy new file mode 100755 index 000000000..9eaea561c --- /dev/null +++ b/lib/WorkflowMain.groovy @@ -0,0 +1,107 @@ +// +// This file holds several functions specific to the main.nf workflow in the nf-core/chipseq pipeline +// + +class WorkflowMain { + + // + // Citation string for pipeline + // + public static String citation(workflow) { + return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + + "* The pipeline\n" + + " https://doi.org/10.5281/zenodo.3240506\n\n" + + "* The nf-core framework\n" + + " https://doi.org/10.1038/s41587-020-0439-x\n\n" + + "* Software dependencies\n" + + " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" + } + + // + // Print help to screen if required + // + public static String help(workflow, params, log) { + def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --outdir --genome GRCh37 -profile docker" + def help_string = '' + help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) + help_string += NfcoreSchema.paramsHelp(workflow, params, command) + help_string += '\n' + citation(workflow) + '\n' + help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) + return help_string + } + + // + // Print parameter summary log to screen + // + public static String paramsSummaryLog(workflow, params, log) { + def summary_log = '' + summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) + summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) + summary_log += '\n' + citation(workflow) + '\n' + summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) + return summary_log + } + + // + // Validate parameters and print summary to screen + // + public static void initialise(workflow, params, log) { + // Print help to screen if required + if (params.help) { + log.info help(workflow, params, log) + System.exit(0) + } + + // Validate workflow parameters via the JSON schema + if (params.validate_params) { + NfcoreSchema.validateParameters(workflow, params, log) + } + + // Print parameter summary log to screen + + log.info paramsSummaryLog(workflow, params, log) + + // Check that a -profile or Nextflow config has been provided to run the pipeline + NfcoreTemplate.checkConfigProvided(workflow, log) + + // Check that conda channels are set-up correctly + if (params.enable_conda) { + Utils.checkCondaChannels(log) + } + + // Check AWS batch settings + NfcoreTemplate.awsBatch(workflow, params) + + // Check input has been provided + if (!params.input) { + log.error "Please provide an input samplesheet to the pipeline e.g. '--input samplesheet.csv'" + System.exit(1) + } + } + // + // Get attribute from genome config file e.g. fasta + // + public static Object getGenomeAttribute(params, attribute) { + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey(attribute)) { + return params.genomes[ params.genome ][ attribute ] + } + } + return null + } + + // + // Get macs genome size (macs_gsize) + // + public static Long getMacsGsize(params) { + def val = null + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + if (params.genomes[ params.genome ].containsKey('macs_gsize')) { + if (params.genomes[ params.genome ][ 'macs_gsize' ].containsKey(params.read_length.toString())) { + val = params.genomes[ params.genome ][ 'macs_gsize' ][ params.read_length.toString() ] + } + } + } + return val + } +} diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar new file mode 100644 index 000000000..805c8bb5e Binary files /dev/null and b/lib/nfcore_external_java_deps.jar differ diff --git a/main.nf b/main.nf index fa9c455ac..9baf7cb86 100755 --- a/main.nf +++ b/main.nf @@ -1,1770 +1,72 @@ #!/usr/bin/env nextflow /* -======================================================================================== - nf-core/chipseq -======================================================================================== - nf-core/chipseq Analysis Pipeline. - #### Homepage / Documentation - https://github.com/nf-core/chipseq +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/chipseq +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Github : https://github.com/nf-core/chipseq + Website: https://nf-co.re/chipseq + Slack : https://nfcore.slack.com/channels/chipseq ---------------------------------------------------------------------------------------- */ -def helpMessage() { - log.info nfcoreHeader() - log.info""" - Usage: - - The typical command for running the pipeline is as follows: - - nextflow run nf-core/chipseq --input design.csv --genome GRCh37 -profile docker - - Mandatory arguments: - --input [file] Comma-separated file containing information about the samples in the experiment (see docs/usage.md) (Default: './design.csv') - --fasta [file] Path to Fasta reference. Not mandatory when using reference in iGenomes config via --genome (Default: false) - --gtf [file] Path to GTF file. Not mandatory when using reference in iGenomes config via --genome (Default: false) - -profile [str] Configuration profile to use. Can use multiple (comma separated) - Available: conda, docker, singularity, awsbatch, test - - Generic - --single_end [bool] Specifies that the input is single-end reads (Default: false) - --seq_center [str] Sequencing center information to be added to read group of BAM files (Default: false) - --fragment_size [int] Estimated fragment size used to extend single-end reads (Default: 200) - --fingerprint_bins [int] Number of genomic bins to use when calculating fingerprint plot (Default: 500000) - - References If not specified in the configuration file or you wish to overwrite any of the references - --genome [str] Name of iGenomes reference (Default: false) - --bwa_index [file] Full path to directory containing BWA index including base name i.e. /path/to/index/genome.fa (Default: false) - --gene_bed [file] Path to BED file containing gene intervals (Default: false) - --macs_gsize [str] Effective genome size parameter required by MACS2. If using iGenomes config, values have only been provided when --genome is set as GRCh37, GRCm38, hg19, mm10, BDGP6 and WBcel235 (Default: false) - --blacklist [file] Path to blacklist regions (.BED format), used for filtering alignments (Default: false) - --save_reference [bool] If generated by the pipeline save the BWA index in the results directory (Default: false) - - Trimming - --clip_r1 [int] Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads) (Default: 0) - --clip_r2 [int] Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only) (Default: 0) - --three_prime_clip_r1 [int] Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed (Default: 0) - --three_prime_clip_r2 [int] Instructs Trim Galore to re move bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed (Default: 0) - --trim_nextseq [int] Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails (Default: 0) - --skip_trimming [bool] Skip the adapter trimming step (Default: false) - --save_trimmed [bool] Save the trimmed FastQ files in the results directory (Default: false) - - Alignments - --bwa_min_score [int] Don’t output BWA MEM alignments with score lower than this parameter (Default: false) - --keep_dups [bool] Duplicate reads are not filtered from alignments (Default: false) - --keep_multi_map [bool] Reads mapping to multiple locations are not filtered from alignments (Default: false) - --save_align_intermeds [bool] Save the intermediate BAM files from the alignment step - not done by default (Default: false) - - Peaks - --narrow_peak [bool] Run MACS2 in narrowPeak mode (Default: false) - --broad_cutoff [float] Specifies broad cutoff value for MACS2. Only used when --narrow_peak isnt specified (Default: 0.1) - --macs_fdr [float] Minimum FDR (q-value) cutoff for peak detection, --macs_fdr and --macs_pvalue are mutually exclusive (Default: false) - --macs_pvalue [float] p-value cutoff for peak detection, --macs_fdr and --macs_pvalue are mutually exclusive (Default: false) - --min_reps_consensus [int] Number of biological replicates required from a given condition for a peak to contribute to a consensus peak (Default: 1) - --save_macs_pileup [bool] Instruct MACS2 to create bedGraph files normalised to signal per million reads (Default: false) - --skip_peak_qc [bool] Skip MACS2 peak QC plot generation (Default: false) - --skip_peak_annotation [bool] Skip annotation of MACS2 and consensus peaks with HOMER (Default: false) - --skip_consensus_peaks [bool] Skip consensus peak generation (Default: false) - - Differential analysis - --deseq2_vst [bool] Use vst transformation instead of rlog with DESeq2 (Default: false) - --skip_diff_analysis [bool] Skip differential accessibility analysis (Default: false) - - QC - --skip_fastqc [bool] Skip FastQC (Default: false) - --skip_picard_metrics [bool] Skip Picard CollectMultipleMetrics (Default: false) - --skip_preseq [bool] Skip Preseq (Default: false) - --skip_plot_profile [bool] Skip deepTools plotProfile (Default: false) - --skip_plot_fingerprint [bool] Skip deepTools plotFingerprint (Default: false) - --skip_spp [bool] Skip Phantompeakqualtools (Default: false) - --skip_igv [bool] Skip IGV (Default: false) - --skip_multiqc [bool] Skip MultiQC (Default: false) - - Other - --outdir [file] The output directory where the results will be saved (Default: './results') - --publish_dir_mode [str] Mode for publishing results in the output directory. Available: symlink, rellink, link, copy, copyNoFollow, move (Default: copy) - --email [email] Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits (Default: false) - --email_on_fail [email] Same as --email, except only send mail if the workflow is not successful (Default: false) - --max_multiqc_email_size [str] Threshold size for MultiQC report to be attached in notification email. If file generated by pipeline exceeds the threshold, it will not be attached (Default: 25MB) - -name [str] Name for the pipeline run. If not specified, Nextflow will automatically generate a random mnemonic (Default: false) - - AWSBatch options: - --awsqueue [str] The AWSBatch JobQueue that needs to be set when running on AWSBatch - --awsregion [str] The AWS Region for your AWS Batch job to run on - --awscli [str] Path to the AWS CLI tool - """.stripIndent() -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- SET UP CONFIGURATION VARIABLES -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -// Show help message -if (params.help) { - helpMessage() - exit 0 -} - -// Has the run name been specified by the user? -// this has the bonus effect of catching both -name and --name -custom_runName = params.name -if (!(workflow.runName ==~ /[a-z]+_[a-z]+/)) { - custom_runName = workflow.runName -} - -//////////////////////////////////////////////////// -/* -- DEFAULT PARAMETER VALUES -- */ -//////////////////////////////////////////////////// - -// Check if genome exists in the config file -if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { - exit 1, "The provided genome '${params.genome}' is not available in the iGenomes file. Currently the available genomes are ${params.genomes.keySet().join(", ")}" -} - -// Configurable variables -params.fasta = params.genome ? params.genomes[ params.genome ].fasta ?: false : false -params.bwa_index = params.genome ? params.genomes[ params.genome ].bwa ?: false : false -params.gtf = params.genome ? params.genomes[ params.genome ].gtf ?: false : false -params.gene_bed = params.genome ? params.genomes[ params.genome ].bed12 ?: false : false -params.macs_gsize = params.genome ? params.genomes[ params.genome ].macs_gsize ?: false : false -params.blacklist = params.genome ? params.genomes[ params.genome ].blacklist ?: false : false -params.anno_readme = params.genome ? params.genomes[ params.genome ].readme ?: false : false - -// Global variables -def PEAK_TYPE = params.narrow_peak ? 'narrowPeak' : 'broadPeak' - -//////////////////////////////////////////////////// -/* -- CONFIG FILES -- */ -//////////////////////////////////////////////////// - -// Pipeline config -ch_multiqc_config = file("$baseDir/assets/multiqc_config.yaml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config, checkIfExists: true) : Channel.empty() -ch_output_docs = file("$baseDir/docs/output.md", checkIfExists: true) -ch_output_docs_images = file("$baseDir/docs/images/", checkIfExists: true) - -// JSON files required by BAMTools for alignment filtering -if (params.single_end) { - ch_bamtools_filter_config = file(params.bamtools_filter_se_config, checkIfExists: true) -} else { - ch_bamtools_filter_config = file(params.bamtools_filter_pe_config, checkIfExists: true) -} - -// Header files for MultiQC -ch_peak_count_header = file("$baseDir/assets/multiqc/peak_count_header.txt", checkIfExists: true) -ch_frip_score_header = file("$baseDir/assets/multiqc/frip_score_header.txt", checkIfExists: true) -ch_peak_annotation_header = file("$baseDir/assets/multiqc/peak_annotation_header.txt", checkIfExists: true) -ch_deseq2_pca_header = file("$baseDir/assets/multiqc/deseq2_pca_header.txt", checkIfExists: true) -ch_deseq2_clustering_header = file("$baseDir/assets/multiqc/deseq2_clustering_header.txt", checkIfExists: true) -ch_spp_correlation_header = file("$baseDir/assets/multiqc/spp_correlation_header.txt", checkIfExists: true) -ch_spp_nsc_header = file("$baseDir/assets/multiqc/spp_nsc_header.txt", checkIfExists: true) -ch_spp_rsc_header = file("$baseDir/assets/multiqc/spp_rsc_header.txt", checkIfExists: true) - -//////////////////////////////////////////////////// -/* -- VALIDATE INPUTS -- */ -//////////////////////////////////////////////////// - -if (params.input) { ch_input = file(params.input, checkIfExists: true) } else { exit 1, 'Samples design file not specified!' } -if (params.gtf) { ch_gtf = file(params.gtf, checkIfExists: true) } else { exit 1, 'GTF annotation file not specified!' } -if (params.gene_bed) { ch_gene_bed = file(params.gene_bed, checkIfExists: true) } -if (params.blacklist) { ch_blacklist = Channel.fromPath(params.blacklist, checkIfExists: true) } else { ch_blacklist = Channel.empty() } - -if (params.fasta) { - lastPath = params.fasta.lastIndexOf(File.separator) - bwa_base = params.fasta.substring(lastPath+1) - ch_fasta = file(params.fasta, checkIfExists: true) -} else { - exit 1, 'Fasta file not specified!' -} - -if (params.bwa_index) { - lastPath = params.bwa_index.lastIndexOf(File.separator) - bwa_dir = params.bwa_index.substring(0,lastPath+1) - bwa_base = params.bwa_index.substring(lastPath+1) - Channel - .fromPath(bwa_dir, checkIfExists: true) - .set { ch_bwa_index } -} - -// Save AWS IGenomes file containing annotation version -if (params.anno_readme && file(params.anno_readme).exists()) { - file("${params.outdir}/genome/").mkdirs() - file(params.anno_readme).copyTo("${params.outdir}/genome/") -} - -//////////////////////////////////////////////////// -/* -- AWS -- */ -//////////////////////////////////////////////////// - -if (workflow.profile.contains('awsbatch')) { - // AWSBatch sanity checking - if (!params.awsqueue || !params.awsregion) exit 1, 'Specify correct --awsqueue and --awsregion parameters on AWSBatch!' - // Check outdir paths to be S3 buckets if running on AWSBatch - // related: https://github.com/nextflow-io/nextflow/issues/813 - if (!params.outdir.startsWith('s3:')) exit 1, 'Outdir not on S3 - specify S3 Bucket to run on AWSBatch!' - // Prevent trace files to be stored on S3 since S3 does not support rolling files. - if (params.tracedir.startsWith('s3:')) exit 1, 'Specify a local tracedir or run without trace! S3 cannot be used for tracefiles.' -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- HEADER LOG INFO -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -// Header log info -log.info nfcoreHeader() -def summary = [:] -summary['Run Name'] = custom_runName ?: workflow.runName -summary['Data Type'] = params.single_end ? 'Single-End' : 'Paired-End' -summary['Design File'] = params.input -summary['Genome'] = params.genome ?: 'Not supplied' -summary['Fasta File'] = params.fasta -summary['GTF File'] = params.gtf -if (params.gene_bed) summary['Gene BED File'] = params.gene_bed -if (params.bwa_index) summary['BWA Index'] = params.bwa_index -if (params.blacklist) summary['Blacklist BED'] = params.blacklist -if (params.bwa_min_score) summary['BWA Min Score'] = params.bwa_min_score -summary['MACS2 Genome Size'] = params.macs_gsize ?: 'Not supplied' -summary['Min Consensus Reps'] = params.min_reps_consensus -if (params.macs_gsize) summary['MACS2 Narrow Peaks'] = params.narrow_peak ? 'Yes' : 'No' -if (!params.narrow_peak) summary['MACS2 Broad Cutoff'] = params.broad_cutoff -if (params.macs_fdr) summary['MACS2 FDR'] = params.macs_fdr -if (params.macs_pvalue) summary['MACS2 P-value'] = params.macs_pvalue -if (params.skip_trimming) { - summary['Trimming Step'] = 'Skipped' -} else { - summary['Trim R1'] = "$params.clip_r1 bp" - summary['Trim R2'] = "$params.clip_r2 bp" - summary["Trim 3' R1"] = "$params.three_prime_clip_r1 bp" - summary["Trim 3' R2"] = "$params.three_prime_clip_r2 bp" - summary['NextSeq Trim'] = "$params.trim_nextseq bp" -} -if (params.seq_center) summary['Sequencing Center'] = params.seq_center -if (params.single_end) summary['Fragment Size'] = "$params.fragment_size bp" -summary['Fingerprint Bins'] = params.fingerprint_bins -if (params.keep_dups) summary['Keep Duplicates'] = 'Yes' -if (params.keep_multi_map) summary['Keep Multi-mapped'] = 'Yes' -summary['Save Genome Index'] = params.save_reference ? 'Yes' : 'No' -if (params.save_trimmed) summary['Save Trimmed'] = 'Yes' -if (params.save_align_intermeds) summary['Save Intermeds'] = 'Yes' -if (params.save_macs_pileup) summary['Save MACS2 Pileup'] = 'Yes' -if (params.skip_peak_qc) summary['Skip MACS2 Peak QC'] = 'Yes' -if (params.skip_peak_annotation) summary['Skip Peak Annotation'] = 'Yes' -if (params.skip_consensus_peaks) summary['Skip Consensus Peaks'] = 'Yes' -if (params.deseq2_vst) summary['Use DESeq2 vst Transform'] = 'Yes' -if (params.skip_diff_analysis) summary['Skip Differential Analysis'] = 'Yes' -if (params.skip_fastqc) summary['Skip FastQC'] = 'Yes' -if (params.skip_picard_metrics) summary['Skip Picard Metrics'] = 'Yes' -if (params.skip_preseq) summary['Skip Preseq'] = 'Yes' -if (params.skip_plot_profile) summary['Skip plotProfile'] = 'Yes' -if (params.skip_plot_fingerprint) summary['Skip plotFingerprint'] = 'Yes' -if (params.skip_spp) summary['Skip spp'] = 'Yes' -if (params.skip_igv) summary['Skip IGV'] = 'Yes' -if (params.skip_multiqc) summary['Skip MultiQC'] = 'Yes' -summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" -if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" -summary['Output Dir'] = params.outdir -summary['Launch Dir'] = workflow.launchDir -summary['Working Dir'] = workflow.workDir -summary['Script Dir'] = workflow.projectDir -summary['User'] = workflow.userName -if (workflow.profile.contains('awsbatch')) { - summary['AWS Region'] = params.awsregion - summary['AWS Queue'] = params.awsqueue - summary['AWS CLI'] = params.awscli -} -summary['Config Profile'] = workflow.profile -if (params.config_profile_description) summary['Config Description'] = params.config_profile_description -if (params.config_profile_contact) summary['Config Contact'] = params.config_profile_contact -if (params.config_profile_url) summary['Config URL'] = params.config_profile_url -if (params.email || params.email_on_fail) { - summary['E-mail Address'] = params.email - summary['E-mail on failure'] = params.email_on_fail - summary['MultiQC Max Size'] = params.max_multiqc_email_size -} -log.info summary.collect { k,v -> "${k.padRight(20)}: $v" }.join('\n') -log.info "-\033[2m--------------------------------------------------\033[0m-" - -// Check the hostnames against configured profiles -checkHostname() - -// Show a big warning message if we're not running MACS -if (!params.macs_gsize) { - def warnstring = params.genome ? "supported for '${params.genome}'" : 'supplied' - log.warn "=================================================================\n" + - " WARNING! MACS genome size parameter not $warnstring.\n" + - " Peak calling, annotation and differential analysis will be skipped.\n" + - " Please specify value for '--macs_gsize' to run these steps.\n" + - "=======================================================================" -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- PARSE DESIGN FILE -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -/* - * PREPROCESSING: Reformat design file, check validitiy and create IP vs control mappings - */ -process CHECK_DESIGN { - tag "$design" - publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode - - input: - path design from ch_input - - output: - path 'design_reads.csv' into ch_design_reads_csv - path 'design_controls.csv' into ch_design_controls_csv - - script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - """ - check_design.py $design design_reads.csv design_controls.csv - """ -} - -/* - * Create channels for input fastq files - */ -if (params.single_end) { - ch_design_reads_csv - .splitCsv(header:true, sep:',') - .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true) ] ] } - .into { ch_raw_reads_fastqc; - ch_raw_reads_trimgalore } -} else { - ch_design_reads_csv - .splitCsv(header:true, sep:',') - .map { row -> [ row.sample_id, [ file(row.fastq_1, checkIfExists: true), file(row.fastq_2, checkIfExists: true) ] ] } - .into { ch_raw_reads_fastqc; - ch_raw_reads_trimgalore } -} - -/* - * Create a channel with [sample_id, control id, antibody, replicatesExist, multipleGroups] - */ -ch_design_controls_csv - .splitCsv(header:true, sep:',') - .map { row -> [ row.sample_id, row.control_id, row.antibody, row.replicatesExist.toBoolean(), row.multipleGroups.toBoolean() ] } - .set { ch_design_controls_csv } - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- PREPARE ANNOTATION FILES -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -/* - * PREPROCESSING: Build BWA index - */ -if (!params.bwa_index) { - process BWA_INDEX { - tag "$fasta" - label 'process_high' - publishDir path: { params.save_reference ? "${params.outdir}/genome" : params.outdir }, - saveAs: { params.save_reference ? it : null }, mode: params.publish_dir_mode - - input: - path fasta from ch_fasta - - output: - path 'BWAIndex' into ch_bwa_index - - script: - """ - bwa index -a bwtsw $fasta - mkdir BWAIndex && mv ${fasta}* BWAIndex - """ - } -} - -/* - * PREPROCESSING: Generate gene BED file - */ -// If --gtf is supplied along with --genome -// Make gene bed from supplied --gtf instead of using iGenomes one automatically -def MAKE_BED = false -if (!params.gene_bed) { - MAKE_BED = true -} else if (params.genome && params.gtf) { - if (params.genomes[ params.genome ].gtf != params.gtf) { - MAKE_BED = true - } -} -if (MAKE_BED) { - process MAKE_GENE_BED { - tag "$gtf" - label 'process_low' - publishDir "${params.outdir}/genome", mode: params.publish_dir_mode - - input: - path gtf from ch_gtf - - output: - path '*.bed' into ch_gene_bed - - script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - """ - gtf2bed $gtf > ${gtf.baseName}.bed - """ - } -} - -/* - * PREPROCESSING: Prepare genome intervals for filtering - */ -process MAKE_GENOME_FILTER { - tag "$fasta" - publishDir "${params.outdir}/genome", mode: params.publish_dir_mode - - input: - path fasta from ch_fasta - path blacklist from ch_blacklist.ifEmpty([]) - - output: - path "$fasta" // FASTA FILE FOR IGV - path '*.fai' // FAI INDEX FOR REFERENCE GENOME - path '*.bed' into ch_genome_filter_regions // BED FILE WITHOUT BLACKLIST REGIONS - path '*.sizes' into ch_genome_sizes_bigwig // CHROMOSOME SIZES FILE FOR BEDTOOLS - - script: - blacklist_filter = params.blacklist ? "sortBed -i $blacklist -g ${fasta}.sizes | complementBed -i stdin -g ${fasta}.sizes" : "awk '{print \$1, '0' , \$2}' OFS='\t' ${fasta}.sizes" - """ - samtools faidx $fasta - cut -f 1,2 ${fasta}.fai > ${fasta}.sizes - $blacklist_filter > ${fasta}.include_regions.bed - """ -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- FASTQ QC -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -/* - * STEP 1: FastQC - */ -process FASTQC { - tag "$name" - label 'process_medium' - publishDir "${params.outdir}/fastqc", mode: params.publish_dir_mode, - saveAs: { filename -> - filename.endsWith('.zip') ? "zips/$filename" : filename - } - - when: - !params.skip_fastqc - - input: - tuple val(name), path(reads) from ch_raw_reads_fastqc - - output: - path '*.{zip,html}' into ch_fastqc_reports_mqc - - script: - // Added soft-links to original fastqs for consistent naming in MultiQC - if (params.single_end) { - """ - [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz - fastqc -q -t $task.cpus ${name}.fastq.gz - """ - } else { - """ - [ ! -f ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz - [ ! -f ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz - fastqc -q -t $task.cpus ${name}_1.fastq.gz - fastqc -q -t $task.cpus ${name}_2.fastq.gz - """ - } -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- ADAPTER TRIMMING -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -/* - * STEP 2: Trim Galore! - */ -if (params.skip_trimming) { - ch_trimmed_reads = ch_raw_reads_trimgalore - ch_trimgalore_results_mqc = Channel.empty() - ch_trimgalore_fastqc_reports_mqc = Channel.empty() -} else { - process TRIMGALORE { - tag "$name" - label 'process_high' - publishDir "${params.outdir}/trim_galore", mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.endsWith('.html')) "fastqc/$filename" - else if (filename.endsWith('.zip')) "fastqc/zips/$filename" - else if (filename.endsWith('trimming_report.txt')) "logs/$filename" - else params.save_trimmed ? filename : null - } - - input: - tuple val(name), path(reads) from ch_raw_reads_trimgalore - - output: - tuple val(name), path('*.fq.gz') into ch_trimmed_reads - path '*.txt' into ch_trimgalore_results_mqc - path '*.{zip,html}' into ch_trimgalore_fastqc_reports_mqc - - script: - // Calculate number of --cores for TrimGalore based on value of task.cpus - // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019 - // See: https://github.com/nf-core/atacseq/pull/65 - def cores = 1 - if (task.cpus) { - cores = (task.cpus as int) - 4 - if (params.single_end) cores = (task.cpus as int) - 3 - if (cores < 1) cores = 1 - if (cores > 4) cores = 4 - } - - c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' - c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' - tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' - tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' - nextseq = params.trim_nextseq > 0 ? "--nextseq ${params.trim_nextseq}" : '' - - // Added soft-links to original fastqs for consistent naming in MultiQC - if (params.single_end) { - """ - [ ! -f ${name}.fastq.gz ] && ln -s $reads ${name}.fastq.gz - trim_galore --cores $cores --fastqc --gzip $c_r1 $tpc_r1 $nextseq ${name}.fastq.gz - """ - } else { - """ - [ ! -f ${name}_1.fastq.gz ] && ln -s ${reads[0]} ${name}_1.fastq.gz - [ ! -f ${name}_2.fastq.gz ] && ln -s ${reads[1]} ${name}_2.fastq.gz - trim_galore --cores $cores --paired --fastqc --gzip $c_r1 $c_r2 $tpc_r1 $tpc_r2 $nextseq ${name}_1.fastq.gz ${name}_2.fastq.gz - """ - } - } -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- ALIGN -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -/* - * STEP 3.1: Map read(s) with bwa mem - */ -process BWA_MEM { - tag "$name" - label 'process_high' - - input: - tuple val(name), path(reads) from ch_trimmed_reads - path index from ch_bwa_index.collect() - - output: - tuple val(name), path('*.bam') into ch_bwa_bam - - script: - prefix = "${name}.Lb" - rg = "\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\'" - if (params.seq_center) { - rg = "\'@RG\\tID:${name}\\tSM:${name.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${name}\\tPU:1\\tCN:${params.seq_center}\'" - } - score = params.bwa_min_score ? "-T ${params.bwa_min_score}" : '' - """ - bwa mem \\ - -t $task.cpus \\ - -M \\ - -R $rg \\ - $score \\ - ${index}/${bwa_base} \\ - $reads \\ - | samtools view -@ $task.cpus -b -h -F 0x0100 -O BAM -o ${prefix}.bam - - """ -} +nextflow.enable.dsl = 2 /* - * STEP 3.2: Convert BAM to coordinate sorted BAM - */ -process SORT_BAM { - tag "$name" - label 'process_medium' - if (params.save_align_intermeds) { - publishDir path: "${params.outdir}/bwa/library", mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.endsWith('.flagstat')) "samtools_stats/$filename" - else if (filename.endsWith('.idxstats')) "samtools_stats/$filename" - else if (filename.endsWith('.stats')) "samtools_stats/$filename" - else filename - } - } - - input: - tuple val(name), path(bam) from ch_bwa_bam - - output: - tuple val(name), path('*.sorted.{bam,bam.bai}') into ch_sort_bam_merge - path '*.{flagstat,idxstats,stats}' into ch_sort_bam_flagstat_mqc - - script: - prefix = "${name}.Lb" - """ - samtools sort -@ $task.cpus -o ${prefix}.sorted.bam -T $name $bam - samtools index ${prefix}.sorted.bam - samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat - samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats - samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats - """ -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- MERGE LIBRARY BAM -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -/* - * STEP 4.1: Merge BAM files for all libraries from same sample replicate - */ -ch_sort_bam_merge - .map { it -> [ it[0].split('_')[0..-2].join('_'), it[1] ] } - .groupTuple(by: [0]) - .map { it -> [ it[0], it[1].flatten() ] } - .set { ch_sort_bam_merge } - -process MERGED_BAM { - tag "$name" - label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary", mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.endsWith('.flagstat')) "samtools_stats/$filename" - else if (filename.endsWith('.idxstats')) "samtools_stats/$filename" - else if (filename.endsWith('.stats')) "samtools_stats/$filename" - else if (filename.endsWith('.metrics.txt')) "picard_metrics/$filename" - else params.save_align_intermeds ? filename : null - } - - input: - tuple val(name), path(bams) from ch_sort_bam_merge - - output: - tuple val(name), path("*${prefix}.sorted.{bam,bam.bai}") into ch_merge_bam_filter, - ch_merge_bam_preseq - path '*.{flagstat,idxstats,stats}' into ch_merge_bam_stats_mqc - path '*.txt' into ch_merge_bam_metrics_mqc - - script: - prefix = "${name}.mLb.mkD" - bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() - def avail_mem = 3 - if (!task.memory) { - log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.toGiga() - } - if (bam_files.size() > 1) { - """ - picard -Xmx${avail_mem}g MergeSamFiles \\ - ${'INPUT='+bam_files.join(' INPUT=')} \\ - OUTPUT=${name}.sorted.bam \\ - SORT_ORDER=coordinate \\ - VALIDATION_STRINGENCY=LENIENT \\ - TMP_DIR=tmp - samtools index ${name}.sorted.bam - - picard -Xmx${avail_mem}g MarkDuplicates \\ - INPUT=${name}.sorted.bam \\ - OUTPUT=${prefix}.sorted.bam \\ - ASSUME_SORTED=true \\ - REMOVE_DUPLICATES=false \\ - METRICS_FILE=${prefix}.MarkDuplicates.metrics.txt \\ - VALIDATION_STRINGENCY=LENIENT \\ - TMP_DIR=tmp - - samtools index ${prefix}.sorted.bam - samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats - samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat - samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats - """ - } else { - """ - picard -Xmx${avail_mem}g MarkDuplicates \\ - INPUT=${bam_files[0]} \\ - OUTPUT=${prefix}.sorted.bam \\ - ASSUME_SORTED=true \\ - REMOVE_DUPLICATES=false \\ - METRICS_FILE=${prefix}.MarkDuplicates.metrics.txt \\ - VALIDATION_STRINGENCY=LENIENT \\ - TMP_DIR=tmp - - samtools index ${prefix}.sorted.bam - samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats - samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat - samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats - """ - } -} - -/* - * STEP 4.2: Filter BAM file at merged library-level - */ -process MERGED_BAM_FILTER { - tag "$name" - label 'process_medium' - publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: params.publish_dir_mode, - saveAs: { filename -> - if (params.single_end || params.save_align_intermeds) { - if (filename.endsWith('.flagstat')) "samtools_stats/$filename" - else if (filename.endsWith('.idxstats')) "samtools_stats/$filename" - else if (filename.endsWith('.stats')) "samtools_stats/$filename" - else if (filename.endsWith('.sorted.bam')) filename - else if (filename.endsWith('.sorted.bam.bai')) filename - else null - } - } - - input: - tuple val(name), path(bam) from ch_merge_bam_filter - path bed from ch_genome_filter_regions.collect() - path bamtools_filter_config from ch_bamtools_filter_config - - output: - tuple val(name), path('*.{bam,bam.bai}') into ch_filter_bam - tuple val(name), path('*.flagstat') into ch_filter_bam_flagstat - path '*.{idxstats,stats}' into ch_filter_bam_stats_mqc - - script: - prefix = params.single_end ? "${name}.mLb.clN" : "${name}.mLb.flT" - filter_params = params.single_end ? '-F 0x004' : '-F 0x004 -F 0x0008 -f 0x001' - dup_params = params.keep_dups ? '' : '-F 0x0400' - multimap_params = params.keep_multi_map ? '' : '-q 1' - blacklist_params = params.blacklist ? "-L $bed" : '' - name_sort_bam = params.single_end ? '' : "samtools sort -n -@ $task.cpus -o ${prefix}.bam -T $prefix ${prefix}.sorted.bam" - """ - samtools view \\ - $filter_params \\ - $dup_params \\ - $multimap_params \\ - $blacklist_params \\ - -b ${bam[0]} \\ - | bamtools filter \\ - -out ${prefix}.sorted.bam \\ - -script $bamtools_filter_config - - samtools index ${prefix}.sorted.bam - samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat - samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats - samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats - - $name_sort_bam - """ -} - -/* - * STEP 4.3: Remove orphan reads from paired-end BAM file - */ -if (params.single_end) { - ch_filter_bam - .into { ch_rm_orphan_bam_metrics; - ch_rm_orphan_bam_bigwig; - ch_rm_orphan_bam_macs_1; - ch_rm_orphan_bam_macs_2; - ch_rm_orphan_bam_phantompeakqualtools; - ch_rm_orphan_name_bam_counts } - - ch_filter_bam_flagstat - .into { ch_rm_orphan_flagstat_bigwig; - ch_rm_orphan_flagstat_macs; - ch_rm_orphan_flagstat_mqc } - - ch_filter_bam_stats_mqc - .set { ch_rm_orphan_stats_mqc } -} else { - process MERGED_BAM_REMOVE_ORPHAN { - tag "$name" - label 'process_medium' - publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.endsWith('.flagstat')) "samtools_stats/$filename" - else if (filename.endsWith('.idxstats')) "samtools_stats/$filename" - else if (filename.endsWith('.stats')) "samtools_stats/$filename" - else if (filename.endsWith('.sorted.bam')) filename - else if (filename.endsWith('.sorted.bam.bai')) filename - else null - } - - input: - tuple val(name), path(bam) from ch_filter_bam - - output: - tuple val(name), path('*.sorted.{bam,bam.bai}') into ch_rm_orphan_bam_metrics, - ch_rm_orphan_bam_bigwig, - ch_rm_orphan_bam_macs_1, - ch_rm_orphan_bam_macs_2, - ch_rm_orphan_bam_phantompeakqualtools - tuple val(name), path("${prefix}.bam") into ch_rm_orphan_name_bam_counts - tuple val(name), path('*.flagstat') into ch_rm_orphan_flagstat_bigwig, - ch_rm_orphan_flagstat_macs, - ch_rm_orphan_flagstat_mqc - path '*.{idxstats,stats}' into ch_rm_orphan_stats_mqc - - script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - prefix = "${name}.mLb.clN" - """ - bampe_rm_orphan.py ${bam[0]} ${prefix}.bam --only_fr_pairs - - samtools sort -@ $task.cpus -o ${prefix}.sorted.bam -T $prefix ${prefix}.bam - samtools index ${prefix}.sorted.bam - samtools flagstat ${prefix}.sorted.bam > ${prefix}.sorted.bam.flagstat - samtools idxstats ${prefix}.sorted.bam > ${prefix}.sorted.bam.idxstats - samtools stats ${prefix}.sorted.bam > ${prefix}.sorted.bam.stats - """ - } -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- MERGE LIBRARY BAM POST-ANALYSIS -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -/* - * STEP 5.1: Preseq analysis after merging libraries and before filtering - */ -process PRESEQ { - tag "$name" - label 'process_medium' - label 'error_ignore' - publishDir "${params.outdir}/bwa/mergedLibrary/preseq", mode: params.publish_dir_mode - - when: - !params.skip_preseq - - input: - tuple val(name), path(bam) from ch_merge_bam_preseq - - output: - path '*.ccurve.txt' into ch_preseq_mqc - path '*.log' - - script: - pe = params.single_end ? '' : '-pe' - """ - preseq lc_extrap \\ - -output ${name}.ccurve.txt \\ - -verbose \\ - -bam \\ - $pe \\ - -seed 1 \\ - ${bam[0]} - cp .command.err ${name}.command.log - """ -} - -/* - * STEP 5.2: Picard CollectMultipleMetrics after merging libraries and filtering - */ -process PICARD_METRICS { - tag "$name" - label 'process_medium' - publishDir path: "${params.outdir}/bwa/mergedLibrary", mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.endsWith('_metrics')) "picard_metrics/$filename" - else if (filename.endsWith('.pdf')) "picard_metrics/pdf/$filename" - else null - } - - when: - !params.skip_picard_metrics - - input: - tuple val(name), path(bam) from ch_rm_orphan_bam_metrics - path fasta from ch_fasta - - output: - path '*_metrics' into ch_collectmetrics_mqc - path '*.pdf' - - script: - prefix = "${name}.mLb.clN" - def avail_mem = 3 - if (!task.memory) { - log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' - } else { - avail_mem = task.memory.toGiga() - } - """ - picard -Xmx${avail_mem}g CollectMultipleMetrics \\ - INPUT=${bam[0]} \\ - OUTPUT=${prefix}.CollectMultipleMetrics \\ - REFERENCE_SEQUENCE=$fasta \\ - VALIDATION_STRINGENCY=LENIENT \\ - TMP_DIR=tmp - """ -} - -/* - * STEP 5.3: Read depth normalised bigWig - */ -process BIGWIG { - tag "$name" - label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/bigwig", mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.endsWith('scale_factor.txt')) "scale/$filename" - else if (filename.endsWith('.bigWig')) filename - else null - } - - input: - tuple val(name), path(bam), path(flagstat) from ch_rm_orphan_bam_bigwig.join(ch_rm_orphan_flagstat_bigwig, by: [0]) - path sizes from ch_genome_sizes_bigwig.collect() - - output: - tuple val(name), path('*.bigWig') into ch_bigwig_plotprofile - path '*igv.txt' into ch_bigwig_igv - path '*scale_factor.txt' - - script: - pe_fragment = params.single_end ? '' : '-pc' - extend = (params.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' - """ - SCALE_FACTOR=\$(grep 'mapped (' $flagstat | awk '{print 1000000/\$1}') - echo \$SCALE_FACTOR > ${name}.scale_factor.txt - genomeCoverageBed -ibam ${bam[0]} -bg -scale \$SCALE_FACTOR $pe_fragment $extend | sort -T '.' -k1,1 -k2,2n > ${name}.bedGraph - - bedGraphToBigWig ${name}.bedGraph $sizes ${name}.bigWig - - find * -type f -name "*.bigWig" -exec echo -e "bwa/mergedLibrary/bigwig/"{}"\\t0,0,178" \\; > ${name}.bigWig.igv.txt - """ -} - -/* - * STEP 5.4: Generate gene body coverage plot with deepTools plotProfile and plotHeatmap - */ -process PLOTPROFILE { - tag "$name" - label 'process_high' - publishDir "${params.outdir}/bwa/mergedLibrary/deepTools/plotProfile", mode: params.publish_dir_mode - - when: - !params.skip_plot_profile - - input: - tuple val(name), path(bigwig) from ch_bigwig_plotprofile - path bed from ch_gene_bed - - output: - path '*.plotProfile.tab' into ch_plotprofile_mqc - path '*.{gz,pdf,mat.tab}' - - script: - """ - computeMatrix scale-regions \\ - --regionsFileName $bed \\ - --scoreFileName $bigwig \\ - --outFileName ${name}.computeMatrix.mat.gz \\ - --outFileNameMatrix ${name}.computeMatrix.vals.mat.tab \\ - --regionBodyLength 1000 \\ - --beforeRegionStartLength 3000 \\ - --afterRegionStartLength 3000 \\ - --skipZeros \\ - --smartLabels \\ - --numberOfProcessors $task.cpus - - plotProfile --matrixFile ${name}.computeMatrix.mat.gz \\ - --outFileName ${name}.plotProfile.pdf \\ - --outFileNameData ${name}.plotProfile.tab - - plotHeatmap --matrixFile ${name}.computeMatrix.mat.gz \\ - --outFileName ${name}.plotHeatmap.pdf \\ - --outFileNameMatrix ${name}.plotHeatmap.mat.tab - """ -} - -/* - * STEP 5.5: Phantompeakqualtools - */ -process PHANTOMPEAKQUALTOOLS { - tag "$name" - label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/phantompeakqualtools", mode: params.publish_dir_mode - - when: - !params.skip_spp - - input: - tuple val(name), path(bam) from ch_rm_orphan_bam_phantompeakqualtools - path spp_correlation_header from ch_spp_correlation_header - path spp_nsc_header from ch_spp_nsc_header - path spp_rsc_header from ch_spp_rsc_header - - output: - path '*.spp.out' into ch_spp_out_mqc - path '*_mqc.tsv' into ch_spp_csv_mqc - path '*.pdf' - - script: - """ - RUN_SPP=`which run_spp.R` - Rscript -e "library(caTools); source(\\"\$RUN_SPP\\")" -c="${bam[0]}" -savp="${name}.spp.pdf" -savd="${name}.spp.Rdata" -out="${name}.spp.out" -p=$task.cpus - cp $spp_correlation_header ${name}_spp_correlation_mqc.tsv - Rscript -e "load('${name}.spp.Rdata'); write.table(crosscorr\\\$cross.correlation, file=\\"${name}_spp_correlation_mqc.tsv\\", sep=",", quote=FALSE, row.names=FALSE, col.names=FALSE,append=TRUE)" - - awk -v OFS='\t' '{print "${name}", \$9}' ${name}.spp.out | cat $spp_nsc_header - > ${name}_spp_nsc_mqc.tsv - awk -v OFS='\t' '{print "${name}", \$10}' ${name}.spp.out | cat $spp_rsc_header - > ${name}_spp_rsc_mqc.tsv - """ -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- MERGE LIBRARY PEAK ANALYSIS -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -// Create channel linking IP bams with control bams -ch_rm_orphan_bam_macs_1 - .combine(ch_rm_orphan_bam_macs_2) - .set { ch_rm_orphan_bam_macs_1 } - -ch_design_controls_csv - .combine(ch_rm_orphan_bam_macs_1) - .filter { it[0] == it[5] && it[1] == it[7] } - .join(ch_rm_orphan_flagstat_macs) - .map { it -> it[2..-1] } - .into { ch_group_bam_macs; - ch_group_bam_plotfingerprint; - ch_group_bam_counts } - -/* - * STEP 6.1: deepTools plotFingerprint - */ -process PLOTFINGERPRINT { - tag "${ip} vs ${control}" - label 'process_high' - publishDir "${params.outdir}/bwa/mergedLibrary/deepTools/plotFingerprint", mode: params.publish_dir_mode - - when: - !params.skip_plot_fingerprint - - input: - tuple val(antibody), val(replicatesExist), val(multipleGroups), val(ip), path(ipbam), val(control), path(controlbam), path(ipflagstat) from ch_group_bam_plotfingerprint - - output: - path '*.raw.txt' into ch_plotfingerprint_mqc - path '*.{txt,pdf}' - - script: - extend = (params.single_end && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' - """ - plotFingerprint \\ - --bamfiles ${ipbam[0]} ${controlbam[0]} \\ - --plotFile ${ip}.plotFingerprint.pdf \\ - $extend \\ - --labels $ip $control \\ - --outRawCounts ${ip}.plotFingerprint.raw.txt \\ - --outQualityMetrics ${ip}.plotFingerprint.qcmetrics.txt \\ - --skipZeros \\ - --JSDsample ${controlbam[0]} \\ - --numberOfProcessors $task.cpus \\ - --numberOfSamples $params.fingerprint_bins - """ -} - -/* - * STEP 6.2: Call peaks with MACS2 and calculate FRiP score - */ -process MACS2 { - tag "${ip} vs ${control}" - label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}", mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.endsWith('.tsv')) "qc/$filename" - else if (filename.endsWith('.igv.txt')) null - else filename - } - - when: - params.macs_gsize - - input: - tuple val(antibody), val(replicatesExist), val(multipleGroups), val(ip), path(ipbam), val(control), path(controlbam), path(ipflagstat) from ch_group_bam_macs - path peak_count_header from ch_peak_count_header - path frip_score_header from ch_frip_score_header - - output: - tuple val(antibody), val(replicatesExist), val(multipleGroups), val(ip), val(control), path("*.$PEAK_TYPE") into ch_macs_homer, - ch_macs_qc, - ch_macs_consensus - path '*igv.txt' into ch_macs_igv - path '*_mqc.tsv' into ch_macs_mqc - path '*.{bed,xls,gappedPeak,bdg}' - - script: - broad = params.narrow_peak ? '' : "--broad --broad-cutoff ${params.broad_cutoff}" - format = params.single_end ? 'BAM' : 'BAMPE' - pileup = params.save_macs_pileup ? '-B --SPMR' : '' - fdr = params.macs_fdr ? "--qvalue ${params.macs_fdr}" : '' - pvalue = params.macs_pvalue ? "--pvalue ${params.macs_pvalue}" : '' - """ - macs2 callpeak \\ - -t ${ipbam[0]} \\ - -c ${controlbam[0]} \\ - $broad \\ - -f $format \\ - -g $params.macs_gsize \\ - -n $ip \\ - $pileup \\ - $fdr \\ - $pvalue \\ - --keep-dup all - - cat ${ip}_peaks.${PEAK_TYPE} | wc -l | awk -v OFS='\t' '{ print "${ip}", \$1 }' | cat $peak_count_header - > ${ip}_peaks.count_mqc.tsv - - READS_IN_PEAKS=\$(intersectBed -a ${ipbam[0]} -b ${ip}_peaks.${PEAK_TYPE} -bed -c -f 0.20 | awk -F '\t' '{sum += \$NF} END {print sum}') - grep 'mapped (' $ipflagstat | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${ip}", a/\$1}' | cat $frip_score_header - > ${ip}_peaks.FRiP_mqc.tsv - - find * -type f -name "*.${PEAK_TYPE}" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/"{}"\\t0,0,178" \\; > ${ip}_peaks.igv.txt - """ -} - -/* - * STEP 6.3: Annotate peaks with HOMER - */ -process MACS2_ANNOTATE { - tag "${ip} vs ${control}" - label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}", mode: params.publish_dir_mode - - when: - params.macs_gsize && !params.skip_peak_annotation - - input: - tuple val(antibody), val(replicatesExist), val(multipleGroups), val(ip), val(control), path(peak) from ch_macs_homer - path fasta from ch_fasta - path gtf from ch_gtf - - output: - path '*.txt' into ch_macs_annotate - - script: - """ - annotatePeaks.pl \\ - $peak \\ - $fasta \\ - -gid \\ - -gtf $gtf \\ - -cpu $task.cpus \\ - > ${ip}_peaks.annotatePeaks.txt - """ -} - -/* - * STEP 6.4: Aggregated QC plots for peaks, FRiP and peak-to-gene annotation - */ -process MACS2_QC { - label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/qc", mode: params.publish_dir_mode - - when: - params.macs_gsize && !params.skip_peak_annotation && !params.skip_peak_qc - - input: - path peaks from ch_macs_qc.collect{ it[-1] } - path annos from ch_macs_annotate.collect() - path peak_annotation_header from ch_peak_annotation_header - - output: - path '*.tsv' into ch_macs_qc_mqc - path '*.{txt,pdf}' - - script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ - """ - plot_macs_qc.r \\ - -i ${peaks.join(',')} \\ - -s ${peaks.join(',').replaceAll("_peaks.${PEAK_TYPE}","")} \\ - -o ./ \\ - -p macs_peak - - plot_homer_annotatepeaks.r \\ - -i ${annos.join(',')} \\ - -s ${annos.join(',').replaceAll("_peaks.annotatePeaks.txt","")} \\ - -o ./ \\ - -p macs_annotatePeaks - - cat $peak_annotation_header macs_annotatePeaks.summary.txt > macs_annotatePeaks.summary_mqc.tsv - """ -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- CONSENSUS PEAKS ANALYSIS -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -// Group by ip from this point and carry forward boolean variables -ch_macs_consensus - .map { it -> [ it[0], it[1], it[2], it[-1] ] } - .groupTuple() - .map { it -> [ it[0], it[1][0], it[2][0], it[3].sort() ] } - .set { ch_macs_consensus } - -/* - * STEP 7.1: Consensus peaks across samples, create boolean filtering file, SAF file for featureCounts and UpSetR plot for intersection - */ -process CONSENSUS_PEAKS { - tag "${antibody}" - label 'process_long' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}", mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.endsWith('.igv.txt')) null - else filename - } - - when: - params.macs_gsize && (replicatesExist || multipleGroups) && !params.skip_consensus_peaks - - input: - tuple val(antibody), val(replicatesExist), val(multipleGroups), path(peaks) from ch_macs_consensus - - output: - tuple val(antibody), val(replicatesExist), val(multipleGroups), path('*.bed') into ch_macs_consensus_bed - tuple val(antibody), path('*.saf') into ch_macs_consensus_saf - path '*.boolean.txt' into ch_macs_consensus_bool - path '*igv.txt' into ch_macs_consensus_igv - path '*.intersect.{txt,plot.pdf}' - - script: // scripts are bundled with the pipeline in nf-core/chipseq/bin/ - prefix = "${antibody}.consensus_peaks" - mergecols = params.narrow_peak ? (2..10).join(',') : (2..9).join(',') - collapsecols = params.narrow_peak ? (['collapse']*9).join(',') : (['collapse']*8).join(',') - expandparam = params.narrow_peak ? '--is_narrow_peak' : '' - """ - sort -T '.' -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\ - | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt - - macs2_merged_expand.py ${prefix}.txt \\ - ${peaks.collect{it.toString()}.sort().join(',').replaceAll("_peaks.${PEAK_TYPE}","")} \\ - ${prefix}.boolean.txt \\ - --min_replicates $params.min_reps_consensus \\ - $expandparam - - awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2, \$3, \$4, "0", "+" }' ${prefix}.boolean.txt > ${prefix}.bed - - echo -e "GeneID\tChr\tStart\tEnd\tStrand" > ${prefix}.saf - awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$4, \$1, \$2, \$3, "+" }' ${prefix}.boolean.txt >> ${prefix}.saf - - plot_peak_intersect.r -i ${prefix}.boolean.intersect.txt -o ${prefix}.boolean.intersect.plot.pdf - - find * -type f -name "${prefix}.bed" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}/"{}"\\t0,0,0" \\; > ${prefix}.bed.igv.txt - """ -} - -/* - * STEP 7.2: Annotate consensus peaks with HOMER, and add annotation to boolean output file - */ -process CONSENSUS_PEAKS_ANNOTATE { - tag "${antibody}" - label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}", mode: params.publish_dir_mode - - when: - params.macs_gsize && (replicatesExist || multipleGroups) && !params.skip_consensus_peaks && !params.skip_peak_annotation - - input: - tuple val(antibody), val(replicatesExist), val(multipleGroups), path(bed) from ch_macs_consensus_bed - path bool from ch_macs_consensus_bool - path fasta from ch_fasta - path gtf from ch_gtf - - output: - path '*.annotatePeaks.txt' - - script: - prefix = "${antibody}.consensus_peaks" - """ - annotatePeaks.pl \\ - $bed \\ - $fasta \\ - -gid \\ - -gtf $gtf \\ - -cpu $task.cpus \\ - > ${prefix}.annotatePeaks.txt - - cut -f2- ${prefix}.annotatePeaks.txt | awk 'NR==1; NR > 1 {print \$0 | "sort -T '.' -k1,1 -k2,2n"}' | cut -f6- > tmp.txt - paste $bool tmp.txt > ${prefix}.boolean.annotatePeaks.txt - """ -} - -// Get BAM and SAF files for each ip -ch_group_bam_counts - .map { it -> [ it[3], [ it[0], it[1], it[2] ] ] } - .join(ch_rm_orphan_name_bam_counts) - .map { it -> [ it[1][0], it[1][1], it[1][2], it[2] ] } - .groupTuple() - .map { it -> [ it[0], it[1][0], it[2][0], it[3].flatten().sort() ] } - .join(ch_macs_consensus_saf) - .set { ch_group_bam_counts } - -/* - * STEP 7.3: Count reads in consensus peaks with featureCounts - */ -process CONSENSUS_PEAKS_COUNTS { - tag "${antibody}" - label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}", mode: params.publish_dir_mode - - when: - params.macs_gsize && (replicatesExist || multipleGroups) && !params.skip_consensus_peaks - - input: - tuple val(antibody), val(replicatesExist), val(multipleGroups), path(bams), path(saf) from ch_group_bam_counts - - output: - tuple val(antibody), val(replicatesExist), val(multipleGroups), path('*featureCounts.txt') into ch_macs_consensus_counts - path '*featureCounts.txt.summary' into ch_macs_consensus_counts_mqc - - script: - prefix = "${antibody}.consensus_peaks" - bam_files = bams.findAll { it.toString().endsWith('.bam') }.sort() - pe_params = params.single_end ? '' : '-p --donotsort' - """ - featureCounts \\ - -F SAF \\ - -O \\ - --fracOverlap 0.2 \\ - -T $task.cpus \\ - $pe_params \\ - -a $saf \\ - -o ${prefix}.featureCounts.txt \\ - ${bam_files.join(' ')} - """ -} - -/* - * STEP 7.4: Differential analysis with DESeq2 - */ -process CONSENSUS_PEAKS_DESEQ2 { - tag "${antibody}" - label 'process_medium' - publishDir "${params.outdir}/bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}/deseq2", mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.endsWith('.igv.txt')) null - else filename - } - - when: - params.macs_gsize && replicatesExist && multipleGroups && !params.skip_consensus_peaks && !params.skip_diff_analysis - - input: - tuple val(antibody), val(replicatesExist), val(multipleGroups), path(counts) from ch_macs_consensus_counts - path deseq2_pca_header from ch_deseq2_pca_header - path deseq2_clustering_header from ch_deseq2_clustering_header - - output: - path '*.tsv' into ch_macs_consensus_deseq_mqc - path '*igv.txt' into ch_macs_consensus_deseq_comp_igv - path '*.{RData,results.txt,pdf,log}' - path 'sizeFactors' - path '*vs*/*.{pdf,txt}' - path '*vs*/*.bed' - - script: - prefix = "${antibody}.consensus_peaks" - bam_ext = params.single_end ? '.mLb.clN.sorted.bam' : '.mLb.clN.bam' - vst = params.deseq2_vst ? '--vst TRUE' : '' - """ - featurecounts_deseq2.r \\ - --featurecount_file $counts \\ - --bam_suffix '$bam_ext' \\ - --outdir ./ \\ - --outprefix $prefix \\ - --outsuffix '' \\ - --cores $task.cpus \\ - $vst - - sed 's/deseq2_pca/deseq2_pca_${task.index}/g' <$deseq2_pca_header >tmp.txt - sed -i -e 's/DESeq2 /${antibody} DESeq2 /g' tmp.txt - cat tmp.txt ${prefix}.pca.vals.txt > ${prefix}.pca.vals_mqc.tsv - - sed 's/deseq2_clustering/deseq2_clustering_${task.index}/g' <$deseq2_clustering_header >tmp.txt - sed -i -e 's/DESeq2 /${antibody} DESeq2 /g' tmp.txt - cat tmp.txt ${prefix}.sample.dists.txt > ${prefix}.sample.dists_mqc.tsv - - find * -type f -name "*.FDR0.05.results.bed" -exec echo -e "bwa/mergedLibrary/macs/${PEAK_TYPE}/consensus/${antibody}/deseq2/"{}"\\t255,0,0" \\; > ${prefix}.igv.txt - """ -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- IGV -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -/* - * STEP 8: Create IGV session file - */ -process IGV { - publishDir "${params.outdir}/igv/${PEAK_TYPE}", mode: params.publish_dir_mode - - when: - !params.skip_igv - - input: - path fasta from ch_fasta - path bigwigs from ch_bigwig_igv.collect().ifEmpty([]) - path peaks from ch_macs_igv.collect().ifEmpty([]) - path consensus_peaks from ch_macs_consensus_igv.collect().ifEmpty([]) - path differential_peaks from ch_macs_consensus_deseq_comp_igv.collect().ifEmpty([]) - - output: - path '*.{txt,xml}' - - script: // scripts are bundled with the pipeline in nf-core/chipseq/bin/ - """ - cat *.txt > igv_files.txt - igv_files_to_session.py igv_session.xml igv_files.txt ../../genome/${fasta.getName()} --path_prefix '../../' - """ -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- MULTIQC -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -/* - * Parse software version numbers - */ -process get_software_versions { - publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode, - saveAs: { filename -> - if (filename.indexOf('.csv') > 0) filename - else null - } - - output: - path 'software_versions_mqc.yaml' into ch_software_versions_mqc - path 'software_versions.csv' +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + GENOME PARAMETER VALUES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - script: - """ - echo $workflow.manifest.version > v_pipeline.txt - echo $workflow.nextflow.version > v_nextflow.txt - fastqc --version > v_fastqc.txt - trim_galore --version > v_trim_galore.txt - echo \$(bwa 2>&1) > v_bwa.txt - samtools --version > v_samtools.txt - bedtools --version > v_bedtools.txt - echo \$(bamtools --version 2>&1) > v_bamtools.txt - echo \$(plotFingerprint --version 2>&1) > v_deeptools.txt || true - picard MarkDuplicates --version &> v_picard.txt || true - echo \$(R --version 2>&1) > v_R.txt - python -c "import pysam; print(pysam.__version__)" > v_pysam.txt - echo \$(macs2 --version 2>&1) > v_macs2.txt - touch v_homer.txt - echo \$(featureCounts -v 2>&1) > v_featurecounts.txt - preseq &> v_preseq.txt - multiqc --version > v_multiqc.txt - scrape_software_versions.py &> software_versions_mqc.yaml - """ -} +params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') +params.bwa_index = WorkflowMain.getGenomeAttribute(params, 'bwa') +params.bowtie2_index = WorkflowMain.getGenomeAttribute(params, 'bowtie2') +params.chromap_index = WorkflowMain.getGenomeAttribute(params, 'chromap') +params.star_index = WorkflowMain.getGenomeAttribute(params, 'star') +params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') +params.gff = WorkflowMain.getGenomeAttribute(params, 'gff') +params.gene_bed = WorkflowMain.getGenomeAttribute(params, 'gene_bed') +params.blacklist = WorkflowMain.getGenomeAttribute(params, 'blacklist') +params.macs_gsize = WorkflowMain.getMacsGsize(params) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE & PRINT PARAMETER SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ -Channel.from(summary.collect{ [it.key, it.value] }) - .map { k,v -> "
$k
${v ?: 'N/A'}
" } - .reduce { a, b -> return [a, b].join("\n ") } - .map { x -> """ - id: 'nf-core-chipseq-summary' - description: " - this information is collected when the pipeline is started." - section_name: 'nf-core/chipseq Workflow Summary' - section_href: 'https://github.com/nf-core/chipseq' - plot_type: 'html' - data: | -
- $x -
- """.stripIndent() } - .set { ch_workflow_summary } +WorkflowMain.initialise(workflow, params, log) /* - * STEP 9: MultiQC - */ -process MULTIQC { - publishDir "${params.outdir}/multiqc/${PEAK_TYPE}", mode: params.publish_dir_mode - - when: - !params.skip_multiqc - - input: - path (multiqc_config) from ch_multiqc_config - path (mqc_custom_config) from ch_multiqc_custom_config.collect().ifEmpty([]) - - path ('software_versions/*') from ch_software_versions_mqc.collect() - path workflow_summary from ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') - - path ('fastqc/*') from ch_fastqc_reports_mqc.collect().ifEmpty([]) - path ('trimgalore/*') from ch_trimgalore_results_mqc.collect().ifEmpty([]) - path ('trimgalore/fastqc/*') from ch_trimgalore_fastqc_reports_mqc.collect().ifEmpty([]) - - path ('alignment/library/*') from ch_sort_bam_flagstat_mqc.collect() - path ('alignment/mergedLibrary/*') from ch_merge_bam_stats_mqc.collect() - path ('alignment/mergedLibrary/*') from ch_rm_orphan_flagstat_mqc.collect{it[1]} - path ('alignment/mergedLibrary/*') from ch_rm_orphan_stats_mqc.collect() - path ('alignment/mergedLibrary/picard_metrics/*') from ch_merge_bam_metrics_mqc.collect() - path ('alignment/mergedLibrary/picard_metrics/*') from ch_collectmetrics_mqc.collect() - - path ('macs/*') from ch_macs_mqc.collect().ifEmpty([]) - path ('macs/*') from ch_macs_qc_mqc.collect().ifEmpty([]) - path ('macs/consensus/*') from ch_macs_consensus_counts_mqc.collect().ifEmpty([]) - path ('macs/consensus/*') from ch_macs_consensus_deseq_mqc.collect().ifEmpty([]) - - path ('preseq/*') from ch_preseq_mqc.collect().ifEmpty([]) - path ('deeptools/*') from ch_plotfingerprint_mqc.collect().ifEmpty([]) - path ('deeptools/*') from ch_plotprofile_mqc.collect().ifEmpty([]) - path ('phantompeakqualtools/*') from ch_spp_out_mqc.collect().ifEmpty([]) - path ('phantompeakqualtools/*') from ch_spp_csv_mqc.collect().ifEmpty([]) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + NAMED WORKFLOW FOR PIPELINE +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - output: - path '*multiqc_report.html' into ch_multiqc_report - path '*_data' +include { CHIPSEQ } from './workflows/chipseq' - script: - rtitle = custom_runName ? "--title \"$custom_runName\"" : '' - rfilename = custom_runName ? "--filename " + custom_runName.replaceAll('\\W','_').replaceAll('_+','_') + "_multiqc_report" : '' - custom_config_file = params.multiqc_config ? "--config $mqc_custom_config" : '' - """ - multiqc . -f $rtitle $rfilename $custom_config_file - """ +// +// WORKFLOW: Run main nf-core/chipseq analysis pipeline +// +workflow NFCORE_CHIPSEQ { + CHIPSEQ () } -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- REPORTS/DOCUMENTATION -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - /* - * STEP 10: Output description HTML - */ -process output_documentation { - publishDir "${params.outdir}/pipeline_info", mode: params.publish_dir_mode - - input: - path output_docs from ch_output_docs - path images from ch_output_docs_images - - output: - path 'results_description.html' +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN ALL WORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ - script: - """ - markdown_to_html.py $output_docs -o results_description.html - """ +// +// WORKFLOW: Execute a single named workflow for the pipeline +// See: https://github.com/nf-core/rnaseq/issues/619 +// +workflow { + NFCORE_CHIPSEQ () } /* - * Completion e-mail notification - */ -workflow.onComplete { - - // Set up the e-mail variables - def subject = "[nf-core/chipseq] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[nf-core/chipseq] FAILED: $workflow.runName" - } - def email_fields = [:] - email_fields['version'] = workflow.manifest.version - email_fields['runName'] = custom_runName ?: workflow.runName - email_fields['success'] = workflow.success - email_fields['dateComplete'] = workflow.complete - email_fields['duration'] = workflow.duration - email_fields['exitStatus'] = workflow.exitStatus - email_fields['errorMessage'] = (workflow.errorMessage ?: 'None') - email_fields['errorReport'] = (workflow.errorReport ?: 'None') - email_fields['commandLine'] = workflow.commandLine - email_fields['projectDir'] = workflow.projectDir - email_fields['summary'] = summary - email_fields['summary']['Date Started'] = workflow.start - email_fields['summary']['Date Completed'] = workflow.complete - email_fields['summary']['Pipeline script file path'] = workflow.scriptFile - email_fields['summary']['Pipeline script hash ID'] = workflow.scriptId - if (workflow.repository) email_fields['summary']['Pipeline repository Git URL'] = workflow.repository - if (workflow.commitId) email_fields['summary']['Pipeline repository Git Commit'] = workflow.commitId - if (workflow.revision) email_fields['summary']['Pipeline Git branch/tag'] = workflow.revision - email_fields['summary']['Nextflow Version'] = workflow.nextflow.version - email_fields['summary']['Nextflow Build'] = workflow.nextflow.build - email_fields['summary']['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp - - // On success try attach the multiqc report - def mqc_report = null - try { - if (workflow.success) { - mqc_report = ch_multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList) { - log.warn "[nf-core/chipseq] Found multiple reports from process 'multiqc', will use only one" - mqc_report = mqc_report[0] - } - } - } catch (all) { - log.warn "[nf-core/chipseq] Could not attach MultiQC report to summary email" - } - - // Check if we are only sending emails on failure - email_address = params.email - if (!params.email && params.email_on_fail && !workflow.success) { - email_address = params.email_on_fail - } - - // Render the TXT template - def engine = new groovy.text.GStringTemplateEngine() - def tf = new File("$baseDir/assets/email_template.txt") - def txt_template = engine.createTemplate(tf).make(email_fields) - def email_txt = txt_template.toString() - - // Render the HTML template - def hf = new File("$baseDir/assets/email_template.html") - def html_template = engine.createTemplate(hf).make(email_fields) - def email_html = html_template.toString() - - // Render the sendmail template - def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, baseDir: "$baseDir", mqcFile: mqc_report, mqcMaxSize: params.max_multiqc_email_size.toBytes() ] - def sf = new File("$baseDir/assets/sendmail_template.txt") - def sendmail_template = engine.createTemplate(sf).make(smail_fields) - def sendmail_html = sendmail_template.toString() - - // Send the HTML e-mail - if (email_address) { - try { - if (params.plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } - // Try to send HTML e-mail using sendmail - [ 'sendmail', '-t' ].execute() << sendmail_html - log.info "[nf-core/chipseq] Sent summary e-mail to $email_address (sendmail)" - } catch (all) { - // Catch failures and try with plaintext - [ 'mail', '-s', subject, email_address ].execute() << email_txt - log.info "[nf-core/chipseq] Sent summary e-mail to $email_address (mail)" - } - } - - // Write summary e-mail HTML to a file - def output_d = new File("${params.outdir}/pipeline_info/") - if (!output_d.exists()) { - output_d.mkdirs() - } - def output_hf = new File(output_d, "pipeline_report.html") - output_hf.withWriter { w -> w << email_html } - def output_tf = new File(output_d, "pipeline_report.txt") - output_tf.withWriter { w -> w << email_txt } - - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_red = params.monochrome_logs ? '' : "\033[0;31m"; - c_reset = params.monochrome_logs ? '' : "\033[0m"; - - if (workflow.stats.ignoredCount > 0 && workflow.success) { - log.info "-${c_purple}Warning, pipeline completed, but with errored process(es) ${c_reset}-" - log.info "-${c_red}Number of ignored errored process(es) : ${workflow.stats.ignoredCount} ${c_reset}-" - log.info "-${c_green}Number of successfully ran process(es) : ${workflow.stats.succeedCount} ${c_reset}-" - } - - if (workflow.success) { - log.info "-${c_purple}[nf-core/chipseq]${c_green} Pipeline completed successfully${c_reset}-" - } else { - checkHostname() - log.info "-${c_purple}[nf-core/chipseq]${c_red} Pipeline completed with errors${c_reset}-" - } - -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- NF-CORE HEADER -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// - -def nfcoreHeader() { - // Log colors ANSI codes - c_black = params.monochrome_logs ? '' : "\033[0;30m"; - c_blue = params.monochrome_logs ? '' : "\033[0;34m"; - c_cyan = params.monochrome_logs ? '' : "\033[0;36m"; - c_dim = params.monochrome_logs ? '' : "\033[2m"; - c_green = params.monochrome_logs ? '' : "\033[0;32m"; - c_purple = params.monochrome_logs ? '' : "\033[0;35m"; - c_reset = params.monochrome_logs ? '' : "\033[0m"; - c_white = params.monochrome_logs ? '' : "\033[0;37m"; - c_yellow = params.monochrome_logs ? '' : "\033[0;33m"; - - return """ -${c_dim}--------------------------------------------------${c_reset}- - ${c_green},--.${c_black}/${c_green},-.${c_reset} - ${c_blue} ___ __ __ __ ___ ${c_green}/,-._.--~\'${c_reset} - ${c_blue} |\\ | |__ __ / ` / \\ |__) |__ ${c_yellow}} {${c_reset} - ${c_blue} | \\| | \\__, \\__/ | \\ |___ ${c_green}\\`-._,-`-,${c_reset} - ${c_green}`._,._,\'${c_reset} - ${c_purple} nf-core/chipseq v${workflow.manifest.version}${c_reset} - -${c_dim}--------------------------------------------------${c_reset}- - """.stripIndent() -} - -def checkHostname() { - def c_reset = params.monochrome_logs ? '' : "\033[0m" - def c_white = params.monochrome_logs ? '' : "\033[0;37m" - def c_red = params.monochrome_logs ? '' : "\033[1;91m" - def c_yellow_bold = params.monochrome_logs ? '' : "\033[1;93m" - if (params.hostnames) { - def hostname = "hostname".execute().text.trim() - params.hostnames.each { prof, hnames -> - hnames.each { hname -> - if (hostname.contains(hname) && !workflow.profile.contains(prof)) { - log.error "====================================================\n" + - " ${c_red}WARNING!${c_reset} You are running with `-profile $workflow.profile`\n" + - " but your machine hostname is ${c_white}'$hostname'${c_reset}\n" + - " ${c_yellow_bold}It's highly recommended that you use `-profile $prof${c_reset}`\n" + - "============================================================" - } - } - } - } -} - -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// -/* -- -- */ -/* -- END OF PIPELINE -- */ -/* -- -- */ -/////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////// +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/modules.json b/modules.json new file mode 100644 index 000000000..81c94b44a --- /dev/null +++ b/modules.json @@ -0,0 +1,139 @@ +{ + "name": "nf-core/chipseq", + "homePage": "https://github.com/nf-core/chipseq", + "repos": { + "nf-core/modules": { + "git_url": "https://github.com/nf-core/modules.git", + "modules": { + "bowtie2/align": { + "branch": "master", + "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c" + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "bwa/index": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "bwa/mem": { + "branch": "master", + "git_sha": "4f5274c3de0c9521f5033893ff61057a74c45ba9" + }, + "chromap/chromap": { + "branch": "master", + "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + }, + "chromap/index": { + "branch": "master", + "git_sha": "dbb46c9b635080b132bab4b8d5b9a14f0d1c22e7" + }, + "custom/dumpsoftwareversions": { + "branch": "master", + "git_sha": "e5b44499efcf6f7fb24874886bac60591c5d94dd" + }, + "custom/getchromsizes": { + "branch": "master", + "git_sha": "213403187932dbbdd936a04474cc8cd8abae7a08" + }, + "deeptools/computematrix": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "deeptools/plotfingerprint": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "deeptools/plotheatmap": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "deeptools/plotprofile": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "fastqc": { + "branch": "master", + "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe" + }, + "gffread": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "gunzip": { + "branch": "master", + "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6" + }, + "homer/annotatepeaks": { + "branch": "master", + "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" + }, + "khmer/uniquekmers": { + "branch": "master", + "git_sha": "82fdff4fb4ce6cafcc028a7503da835427f35352" + }, + "macs2/callpeak": { + "branch": "master", + "git_sha": "f0800157544a82ae222931764483331a81812012" + }, + "phantompeakqualtools": { + "branch": "master", + "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" + }, + "picard/collectmultiplemetrics": { + "branch": "master", + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + }, + "picard/markduplicates": { + "branch": "master", + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + }, + "picard/mergesamfiles": { + "branch": "master", + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c" + }, + "preseq/lcextrap": { + "branch": "master", + "git_sha": "7111e571cc5b6069de4673cd6165af680f17b4d7" + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f" + }, + "samtools/index": { + "branch": "master", + "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + }, + "samtools/sort": { + "branch": "master", + "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773" + }, + "samtools/stats": { + "branch": "master", + "git_sha": "f48a24770e24358e58de66e9b805a70d77cd154b" + }, + "subread/featurecounts": { + "branch": "master", + "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d" + }, + "trimgalore": { + "branch": "master", + "git_sha": "85ec13ff1fc2196c5a507ea497de468101baabed" + }, + "ucsc/bedgraphtobigwig": { + "branch": "master", + "git_sha": "233fa70811a03a4cecb2ece483b5c8396e2cee1d" + }, + "untar": { + "branch": "master", + "git_sha": "51be617b1ca9bff973655eb899d591ed6ab253b5" + } + } + } + } +} diff --git a/modules/local/annotate_boolean_peaks.nf b/modules/local/annotate_boolean_peaks.nf new file mode 100644 index 000000000..ab2dfbed3 --- /dev/null +++ b/modules/local/annotate_boolean_peaks.nf @@ -0,0 +1,28 @@ +process ANNOTATE_BOOLEAN_PEAKS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(boolean_txt), path(homer_peaks) + + output: + path '*.boolean.annotatePeaks.txt', emit: annotate_peaks_txt + path "versions.yml" , emit: versions + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cut -f2- ${homer_peaks} | awk 'NR==1; NR > 1 {print \$0 | "sort -T '.' -k1,1 -k2,2n"}' | cut -f6- > tmp.txt + paste $boolean_txt tmp.txt > ${prefix}.boolean.annotatePeaks.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/bam_filter.nf b/modules/local/bam_filter.nf new file mode 100644 index 000000000..ff07c084b --- /dev/null +++ b/modules/local/bam_filter.nf @@ -0,0 +1,47 @@ +/* + * Filter BAM file + */ +process BAM_FILTER { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bamtools=2.5.2 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' : + 'quay.io/biocontainers/mulled-v2-0560a8046fc82aa4338588eca29ff18edab2c5aa:5687a7da26983502d0a8a9a6b05ed727c740ddc4-0' }" + + input: + tuple val(meta), path(bam), path(bai) + path bed + path bamtools_filter_se_config + path bamtools_filter_pe_config + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def filter_params = meta.single_end ? '-F 0x004' : '-F 0x004 -F 0x0008 -f 0x001' + def dup_params = params.keep_dups ? '' : '-F 0x0400' + def multimap_params = params.keep_multi_map ? '' : '-q 1' + def blacklist_params = params.blacklist ? "-L $bed" : '' + def config = meta.single_end ? bamtools_filter_se_config : bamtools_filter_pe_config + """ + samtools view \\ + $filter_params \\ + $dup_params \\ + $multimap_params \\ + $blacklist_params \\ + -b $bam \\ + | bamtools filter \\ + -out ${prefix}.bam \\ + -script $config + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + bamtools: \$(echo \$(bamtools --version 2>&1) | sed 's/^.*bamtools //; s/Part .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/bam_remove_orphans.nf b/modules/local/bam_remove_orphans.nf new file mode 100644 index 000000000..21ffc73a9 --- /dev/null +++ b/modules/local/bam_remove_orphans.nf @@ -0,0 +1,43 @@ +/* + * Remove orphan reads from paired-end BAM file + */ +process BAM_REMOVE_ORPHANS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::pysam=0.19.0 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' : + 'quay.io/biocontainers/mulled-v2-57736af1eb98c01010848572c9fec9fff6ffaafd:402e865b8f6af2f3e58c6fc8d57127ff0144b2c7-0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam + path "versions.yml" , emit: versions + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + if (!meta.single_end) { + """ + samtools sort -n -@ $task.cpus -o ${prefix}.name.sorted.bam -T ${prefix}.name.sorted $bam + bampe_rm_orphan.py ${prefix}.name.sorted.bam ${prefix}.bam $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } else { + """ + ln -s $bam ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/local/bedtools_genomecov.nf b/modules/local/bedtools_genomecov.nf new file mode 100644 index 000000000..e8cbb3f74 --- /dev/null +++ b/modules/local/bedtools_genomecov.nf @@ -0,0 +1,41 @@ +process BEDTOOLS_GENOMECOV { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0': + 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + tuple val(meta), path(bam), path(flagstat) + + output: + tuple val(meta), path("*.bedGraph"), emit: bedgraph + tuple val(meta), path("*.txt") , emit: scale_factor + path "versions.yml" , emit: versions + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + + def pe = meta.single_end ? '' : '-pc' + def extend = (meta.single_end && params.fragment_size > 0) ? "-fs ${params.fragment_size}" : '' + """ + SCALE_FACTOR=\$(grep '[0-9] mapped (' $flagstat | awk '{print 1000000/\$1}') + echo \$SCALE_FACTOR > ${prefix}.scale_factor.txt + + bedtools \\ + genomecov \\ + -ibam $bam \\ + -bg \\ + -scale \$SCALE_FACTOR \\ + $pe \\ + $extend \\ + | sort -T '.' -k1,1 -k2,2n > ${prefix}.bedGraph + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/local/deseq2_qc.nf b/modules/local/deseq2_qc.nf new file mode 100644 index 000000000..84ff0d5f0 --- /dev/null +++ b/modules/local/deseq2_qc.nf @@ -0,0 +1,55 @@ +process DESEQ2_QC { + tag "$meta.id" + label 'process_medium' + + // (Bio)conda packages have intentionally not been pinned to a specific version + // This was to avoid the pipeline failing due to package conflicts whilst creating the environment when using -profile conda + conda (params.enable_conda ? "conda-forge::r-base bioconda::bioconductor-deseq2 bioconda::bioconductor-biocparallel bioconda::bioconductor-tximport bioconda::bioconductor-complexheatmap conda-forge::r-optparse conda-forge::r-ggplot2 conda-forge::r-rcolorbrewer conda-forge::r-pheatmap" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' : + 'quay.io/biocontainers/mulled-v2-8849acf39a43cdd6c839a369a74c0adc823e2f91:ab110436faf952a33575c64dd74615a84011450b-0' }" + + input: + tuple val(meta), path(counts) + path deseq2_pca_header + path deseq2_clustering_header + + output: + path "*.pdf" , optional:true, emit: pdf + path "*.RData" , optional:true, emit: rdata + path "*.rds" , optional:true, emit: rds + path "*pca.vals.txt" , optional:true, emit: pca_txt + path "*pca.vals_mqc.tsv" , optional:true, emit: pca_multiqc + path "*sample.dists.txt" , optional:true, emit: dists_txt + path "*sample.dists_mqc.tsv", optional:true, emit: dists_multiqc + path "*.log" , optional:true, emit: log + path "size_factors" , optional:true, emit: size_factors + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def peak_type = params.narrow_peak ? 'narrowPeak' : 'broadPeak' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + deseq2_qc.r \\ + --count_file $counts \\ + --outdir ./ \\ + --outprefix $prefix \\ + --cores $task.cpus \\ + $args + + sed 's/deseq2_pca/deseq2_pca_${task.index}/g' <$deseq2_pca_header >tmp.txt + sed -i -e 's/DESeq2 /${meta.id} DESeq2 /g' tmp.txt + cat tmp.txt ${prefix}.pca.vals.txt > ${prefix}.pca.vals_mqc.tsv + + sed 's/deseq2_clustering/deseq2_clustering_${task.index}/g' <$deseq2_clustering_header >tmp.txt + sed -i -e 's/DESeq2 /${meta.id} DESeq2 /g' tmp.txt + cat tmp.txt ${prefix}.sample.dists.txt > ${prefix}.sample.dists_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + bioconductor-deseq2: \$(Rscript -e "library(DESeq2); cat(as.character(packageVersion('DESeq2')))") + END_VERSIONS + """ +} diff --git a/modules/local/frip_score.nf b/modules/local/frip_score.nf new file mode 100644 index 000000000..337b18c53 --- /dev/null +++ b/modules/local/frip_score.nf @@ -0,0 +1,31 @@ +process FRIP_SCORE { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0': + 'quay.io/biocontainers/mulled-v2-8186960447c5cb2faa697666dc1e6d919ad23f3e:3127fcae6b6bdaf8181e21a26ae61231030a9fcb-0' }" + + input: + tuple val(meta), path(bam), path(peak) + + output: + tuple val(meta), path("*.txt"), emit: txt + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + READS_IN_PEAKS=\$(intersectBed -a $bam -b $peak $args | awk -F '\t' '{sum += \$NF} END {print sum}') + samtools flagstat $bam > ${bam}.flagstat + grep 'mapped (' ${bam}.flagstat | grep -v "primary" | awk -v a="\$READS_IN_PEAKS" -v OFS='\t' '{print "${prefix}", a/\$1}' > ${prefix}.FRiP.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/genome_blacklist_regions.nf b/modules/local/genome_blacklist_regions.nf new file mode 100644 index 000000000..1a28af0f6 --- /dev/null +++ b/modules/local/genome_blacklist_regions.nf @@ -0,0 +1,41 @@ +/* + * Prepare genome intervals for filtering by removing regions in blacklist file + */ +process GENOME_BLACKLIST_REGIONS { + tag "$sizes" + + conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0': + 'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }" + + input: + path sizes + path blacklist + + output: + path '*.bed' , emit: bed + path "versions.yml", emit: versions + + script: + def file_out = "${sizes.simpleName}.include_regions.bed" + if (blacklist) { + """ + sortBed -i $blacklist -g $sizes | complementBed -i stdin -g $sizes > $file_out + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + } else { + """ + awk '{print \$1, '0' , \$2}' OFS='\t' $sizes > $file_out + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + } +} diff --git a/modules/local/gtf2bed.nf b/modules/local/gtf2bed.nf new file mode 100644 index 000000000..1d306cee5 --- /dev/null +++ b/modules/local/gtf2bed.nf @@ -0,0 +1,28 @@ +process GTF2BED { + tag "$gtf" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::perl=5.26.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/perl:5.26.2': + 'quay.io/biocontainers/perl:5.26.2' }" + + input: + path gtf + + output: + path '*.bed' , emit: bed + path "versions.yml", emit: versions + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + """ + gtf2bed \\ + $gtf \\ + > ${gtf.baseName}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + perl: \$(echo \$(perl --version 2>&1) | sed 's/.*v\\(.*\\)) built.*/\\1/') + END_VERSIONS + """ +} diff --git a/modules/local/igv.nf b/modules/local/igv.nf new file mode 100644 index 000000000..213904b3e --- /dev/null +++ b/modules/local/igv.nf @@ -0,0 +1,46 @@ +/* + * Create IGV session file + */ +process IGV { + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3': + 'quay.io/biocontainers/python:3.8.3' }" + + input: + val aligner_dir + val peak_dir + path fasta + path ("${aligner_dir}/mergedLibrary/bigwig/*") + path ("${aligner_dir}/mergedLibrary/macs2/${peak_dir}/*") + path ("${aligner_dir}/mergedLibrary/macs2/${peak_dir}/consensus/*") + path ("mappings/*") + + output: + path "*files.txt" , emit: txt + path "*.xml" , emit: xml + path "versions.yml", emit: versions + + script: // scripts are bundled with the pipeline in nf-core/chipseq/bin/ + def consensus_dir = "${aligner_dir}/mergedLibrary/macs2/${peak_dir}/consensus/*" + """ + find * -type l -name "*.bigWig" -exec echo -e ""{}"\\t0,0,178" \\; > bigwig.igv.txt + find * -type l -name "*Peak" -exec echo -e ""{}"\\t0,0,178" \\; > peaks.igv.txt + # Avoid error when consensus not produced + find * -type l -name "*.bed" -exec echo -e ""{}"\\t0,0,178" \\; | { grep "^$consensus_dir" || test \$? = 1; } > consensus.igv.txt + + touch replace_paths.txt + if [ -d "mappings" ]; then + cat mappings/* > replace_paths.txt + fi + + cat *.igv.txt > igv_files_orig.txt + igv_files_to_session.py igv_session.xml igv_files_orig.txt replace_paths.txt ../../genome/${fasta.getName()} --path_prefix '../../' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/macs2_consensus.nf b/modules/local/macs2_consensus.nf new file mode 100644 index 000000000..ab9ef93a2 --- /dev/null +++ b/modules/local/macs2_consensus.nf @@ -0,0 +1,61 @@ +/* + * Consensus peaks across samples, create boolean filtering file, SAF file for featureCounts + */ +process MACS2_CONSENSUS { + tag "$meta.id" + label 'process_long' + + conda (params.enable_conda ? "conda-forge::biopython conda-forge::r-optparse=1.7.1 conda-forge::r-upsetr=1.4.0 bioconda::bedtools=2.30.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0': + 'quay.io/biocontainers/mulled-v2-2f48cc59b03027e31ead6d383fe1b8057785dd24:5d182f583f4696f4c4d9f3be93052811b383341f-0' }" + + input: + tuple val(meta), path(peaks) + + output: + tuple val(meta), path("*.bed") , emit: bed + tuple val(meta), path("*.saf") , emit: saf + tuple val(meta), path("*.pdf") , emit: pdf + tuple val(meta), path("*.antibody.txt") , emit: txt + tuple val(meta), path("*.boolean.txt") , emit: boolean_txt + tuple val(meta), path("*.intersect.txt"), emit: intersect_txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + def prefix = task.ext.prefix ?: "${meta.id}" + def peak_type = params.narrow_peak ? 'narrowPeak' : 'broadPeak' + def mergecols = params.narrow_peak ? (2..10).join(',') : (2..9).join(',') + def collapsecols = params.narrow_peak ? (['collapse']*9).join(',') : (['collapse']*8).join(',') + def expandparam = params.narrow_peak ? '--is_narrow_peak' : '' + """ + sort -T '.' -k1,1 -k2,2n ${peaks.collect{it.toString()}.sort().join(' ')} \\ + | mergeBed -c $mergecols -o $collapsecols > ${prefix}.txt + + macs2_merged_expand.py \\ + ${prefix}.txt \\ + ${peaks.collect{it.toString()}.sort().join(',').replaceAll("_peaks.${peak_type}","")} \\ + ${prefix}.boolean.txt \\ + --min_replicates $params.min_reps_consensus \\ + $expandparam + + awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$1, \$2, \$3, \$4, "0", "+" }' ${prefix}.boolean.txt > ${prefix}.bed + + echo -e "GeneID\tChr\tStart\tEnd\tStrand" > ${prefix}.saf + awk -v FS='\t' -v OFS='\t' 'FNR > 1 { print \$4, \$1, \$2, \$3, "+" }' ${prefix}.boolean.txt >> ${prefix}.saf + + plot_peak_intersect.r -i ${prefix}.boolean.intersect.txt -o ${prefix}.boolean.intersect.plot.pdf + + echo "${prefix}.bed\t${meta.id}/${prefix}.bed" > ${prefix}.antibody.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + END_VERSIONS + """ + +} diff --git a/modules/local/multiqc.nf b/modules/local/multiqc.nf new file mode 100644 index 000000000..702b239f4 --- /dev/null +++ b/modules/local/multiqc.nf @@ -0,0 +1,72 @@ +process MULTIQC { + label 'process_medium' + + conda (params.enable_conda ? "bioconda::multiqc=1.13a" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1': + 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" + + input: + path multiqc_config + path mqc_custom_config + path software_versions + path workflow_summary + + path ('fastqc/*') + path ('trimgalore/fastqc/*') + path ('trimgalore/*') + + path ('alignment/library/*') + path ('alignment/library/*') + path ('alignment/library/*') + + path ('alignment/mergedLibrary/unfiltered/*') + path ('alignment/mergedLibrary/unfiltered/*') + path ('alignment/mergedLibrary/unfiltered/*') + path ('alignment/mergedLibrary/unfiltered/picard_metrics/*') + + path ('alignment/mergedLibrary/filtered/*') + path ('alignment/mergedLibrary/filtered/*') + path ('alignment/mergedLibrary/filtered/*') + path ('alignment/mergedLibrary/filtered/picard_metrics/*') + + path ('preseq/*') + + path ('deeptools/*') + path ('deeptools/*') + + path ('phantompeakqualtools/*') + path ('phantompeakqualtools/*') + path ('phantompeakqualtools/*') + path ('phantompeakqualtools/*') + + path ('macs2/peaks/*') + path ('macs2/peaks/*') + path ('macs2/annotation/*') + path ('macs2/featurecounts/*') + + path ('deseq2/*') + path ('deseq2/*') + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + def custom_config = params.multiqc_config ? "--config $mqc_custom_config" : '' + """ + multiqc \\ + -f \\ + $args \\ + $custom_config \\ + . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/local/multiqc_custom_peaks.nf b/modules/local/multiqc_custom_peaks.nf new file mode 100644 index 000000000..ebef7b13a --- /dev/null +++ b/modules/local/multiqc_custom_peaks.nf @@ -0,0 +1,28 @@ +process MULTIQC_CUSTOM_PEAKS { + tag "$meta.id" + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(peak), path(frip) + path peak_count_header + path frip_score_header + + output: + tuple val(meta), path("*.peak_count_mqc.tsv"), emit: count + tuple val(meta), path("*.FRiP_mqc.tsv") , emit: frip + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cat $peak | wc -l | awk -v OFS='\t' '{ print "${prefix}", \$1 }' | cat $peak_count_header - > ${prefix}.peak_count_mqc.tsv + cat $frip_score_header $frip > ${prefix}.FRiP_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/multiqc_custom_phantompeakqualtools.nf b/modules/local/multiqc_custom_phantompeakqualtools.nf new file mode 100644 index 000000000..4878e2c27 --- /dev/null +++ b/modules/local/multiqc_custom_phantompeakqualtools.nf @@ -0,0 +1,33 @@ +process MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS { + tag "$meta.id" + conda (params.enable_conda ? "conda-forge::r-base=3.5.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/r-base:3.5.1': + 'quay.io/biocontainers/r-base:3.5.1' }" + + input: + tuple val(meta), path(spp), path(rdata) + path nsc_header + path rsc_header + path correlation_header + + output: + tuple val(meta), path("*.spp_nsc_mqc.tsv") , emit: nsc + tuple val(meta), path("*.spp_rsc_mqc.tsv") , emit: rsc + tuple val(meta), path("*.spp_correlation_mqc.tsv"), emit: correlation + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cp $correlation_header ${prefix}.spp_correlation_mqc.tsv + Rscript --max-ppsize=500000 -e "load('$rdata'); write.table(crosscorr\\\$cross.correlation, file=\\"${prefix}.spp_correlation_mqc.tsv\\", sep=",", quote=FALSE, row.names=FALSE, col.names=FALSE,append=TRUE)" + + awk -v OFS='\t' '{print "${meta.id}", \$9}' $spp | cat $nsc_header - > ${prefix}.spp_nsc_mqc.tsv + awk -v OFS='\t' '{print "${meta.id}", \$10}' $spp | cat $rsc_header - > ${prefix}.spp_rsc_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/plot_homer_annotatepeaks.nf b/modules/local/plot_homer_annotatepeaks.nf new file mode 100644 index 000000000..3375349a1 --- /dev/null +++ b/modules/local/plot_homer_annotatepeaks.nf @@ -0,0 +1,37 @@ +process PLOT_HOMER_ANNOTATEPEAKS { + label 'process_medium' + + conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0': + 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + + input: + path annos + path mqc_header + val suffix + + output: + path '*.txt' , emit: txt + path '*.pdf' , emit: pdf + path '*.tsv' , emit: tsv + path "versions.yml", emit: versions + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "annotatepeaks" + """ + plot_homer_annotatepeaks.r \\ + -i ${annos.join(',')} \\ + -s ${annos.join(',').replaceAll("${suffix}","")} \\ + -p $prefix \\ + $args + + find ./ -type f -name "*summary.txt" -exec cat {} \\; | cat $mqc_header - > ${prefix}.summary_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/plot_macs2_qc.nf b/modules/local/plot_macs2_qc.nf new file mode 100644 index 000000000..bbbf1ce81 --- /dev/null +++ b/modules/local/plot_macs2_qc.nf @@ -0,0 +1,31 @@ +process PLOT_MACS2_QC { + label 'process_medium' + + conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0': + 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + + input: + path peaks + + output: + path '*.txt' , emit: txt + path '*.pdf' , emit: pdf + path "versions.yml", emit: versions + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + def args = task.ext.args ?: '' + def peak_type = params.narrow_peak ? 'narrowPeak' : 'broadPeak' + """ + plot_macs2_qc.r \\ + -i ${peaks.join(',')} \\ + -s ${peaks.join(',').replaceAll("_peaks.${peak_type}","")} \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf new file mode 100644 index 000000000..5094e5a0f --- /dev/null +++ b/modules/local/samplesheet_check.nf @@ -0,0 +1,27 @@ +process SAMPLESHEET_CHECK { + tag "$samplesheet" + + conda (params.enable_conda ? "conda-forge::python=3.8.3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.8.3' : + 'quay.io/biocontainers/python:3.8.3' }" + + input: + path samplesheet + + output: + path '*.csv' , emit: csv + path "versions.yml", emit: versions + + script: // This script is bundled with the pipeline, in nf-core/chipseq/bin/ + """ + check_samplesheet.py \\ + $samplesheet \\ + samplesheet.valid.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/star_align.nf b/modules/local/star_align.nf new file mode 100644 index 000000000..f960045f3 --- /dev/null +++ b/modules/local/star_align.nf @@ -0,0 +1,57 @@ +process STAR_ALIGN { + tag "$meta.id" + label 'process_high' + + // Note: 2.7X indices incompatible with AWS iGenomes. + conda (params.enable_conda ? "bioconda::star=2.6.1d" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/star:2.6.1d--0' : + 'quay.io/biocontainers/star:2.6.1d--0' }" + + input: + tuple val(meta), path(reads) + path index + + output: + tuple val(meta), path('*d.out.bam') , emit: bam + tuple val(meta), path('*Log.final.out') , emit: log_final + tuple val(meta), path('*Log.out') , emit: log_out + tuple val(meta), path('*Log.progress.out'), emit: log_progress + path "versions.yml" , emit: versions + + tuple val(meta), path('*sortedByCoord.out.bam') , optional:true, emit: bam_sorted + tuple val(meta), path('*toTranscriptome.out.bam'), optional:true, emit: bam_transcript + tuple val(meta), path('*Aligned.unsort.out.bam') , optional:true, emit: bam_unsorted + tuple val(meta), path('*fastq.gz') , optional:true, emit: fastq + tuple val(meta), path('*.tab') , optional:true, emit: tab + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def seq_center = params.seq_center ? "--outSAMattrRGline ID:$prefix 'CN:$params.seq_center' 'SM:$prefix'" : "--outSAMattrRGline ID:$prefix 'SM:$prefix'" + def out_sam_type = (args.contains('--outSAMtype')) ? '' : '--outSAMtype BAM Unsorted' + def mv_unsorted_bam = (args.contains('--outSAMtype BAM Unsorted SortedByCoordinate')) ? "mv ${prefix}.Aligned.out.bam ${prefix}.Aligned.unsort.out.bam" : '' + """ + STAR \\ + --genomeDir $index \\ + --readFilesIn $reads \\ + --runThreadN $task.cpus \\ + --outFileNamePrefix $prefix. \\ + $out_sam_type \\ + $seq_center \\ + $args + $mv_unsorted_bam + if [ -f ${prefix}.Unmapped.out.mate1 ]; then + mv ${prefix}.Unmapped.out.mate1 ${prefix}.unmapped_1.fastq + gzip ${prefix}.unmapped_1.fastq + fi + if [ -f ${prefix}.Unmapped.out.mate2 ]; then + mv ${prefix}.Unmapped.out.mate2 ${prefix}.unmapped_2.fastq + gzip ${prefix}.unmapped_2.fastq + fi + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + END_VERSIONS + """ +} diff --git a/modules/local/star_genomegenerate.nf b/modules/local/star_genomegenerate.nf new file mode 100644 index 000000000..3cd4ff20c --- /dev/null +++ b/modules/local/star_genomegenerate.nf @@ -0,0 +1,58 @@ +process STAR_GENOMEGENERATE { + tag "$fasta" + label 'process_high' + + // Note: 2.7X indices incompatible with AWS iGenomes. + conda (params.enable_conda ? "bioconda::star=2.6.1d bioconda::samtools=1.10 conda-forge::gawk=5.1.0" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' : + 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:59cdd445419f14abac76b31dd0d71217994cbcc9-0' }" + + input: + path fasta + path gtf + + output: + path "star" , emit: index + path "versions.yml", emit: versions + + script: + def args = (task.ext.args ?: '').tokenize() + def memory = task.memory ? "--limitGenomeGenerateRAM ${task.memory.toBytes() - 100000000}" : '' + if (args.contains('--genomeSAindexNbases')) { + """ + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + $memory \\ + ${args.join(' ')} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + END_VERSIONS + """ + } else { + """ + samtools faidx $fasta + NUM_BASES=`gawk '{sum = sum + \$2}END{if ((log(sum)/log(2))/2 - 1 > 14) {printf "%.0f", 14} else {printf "%.0f", (log(sum)/log(2))/2 - 1}}' ${fasta}.fai` + mkdir star + STAR \\ + --runMode genomeGenerate \\ + --genomeDir star/ \\ + --genomeFastaFiles $fasta \\ + --sjdbGTFfile $gtf \\ + --runThreadN $task.cpus \\ + --genomeSAindexNbases \$NUM_BASES \\ + $memory \\ + ${args.join(' ')} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + star: \$(STAR --version | sed -e "s/STAR_//g") + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/modules/bowtie2/align/main.nf new file mode 100644 index 000000000..c74e376f7 --- /dev/null +++ b/modules/nf-core/modules/bowtie2/align/main.nf @@ -0,0 +1,71 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label "process_high" + + conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6" : null) + container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ? + "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" : + "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" }" + + input: + tuple val(meta), path(reads) + path index + val save_unaligned + val sort_bam + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz"), emit: fastq, optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } + + def samtools_command = sort_bam ? 'sort' : 'view' + + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> ${prefix}.bowtie2.log \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/modules/bowtie2/align/meta.yml new file mode 100644 index 000000000..42ba0f964 --- /dev/null +++ b/modules/nf-core/modules/bowtie2/align/meta.yml @@ -0,0 +1,62 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Aligment log + pattern: "*.log" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/modules/bowtie2/build/main.nf b/modules/nf-core/modules/bowtie2/build/main.nf new file mode 100644 index 000000000..a4da62d07 --- /dev/null +++ b/modules/nf-core/modules/bowtie2/build/main.nf @@ -0,0 +1,30 @@ +process BOWTIE2_BUILD { + tag "$fasta" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' : + 'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }" + + input: + path fasta + + output: + path 'bowtie2' , emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bowtie2 + bowtie2-build $args --threads $task.cpus $fasta bowtie2/${fasta.baseName} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bowtie2/build/meta.yml b/modules/nf-core/modules/bowtie2/build/meta.yml new file mode 100644 index 000000000..2da9a2171 --- /dev/null +++ b/modules/nf-core/modules/bowtie2/build/meta.yml @@ -0,0 +1,33 @@ +name: bowtie2_build +description: Builds bowtie index for reference genome +keywords: + - build + - index + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - fasta: + type: file + description: Input genome fasta file +output: + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.bt2" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/modules/bwa/index/main.nf b/modules/nf-core/modules/bwa/index/main.nf new file mode 100644 index 000000000..3affbf16b --- /dev/null +++ b/modules/nf-core/modules/bwa/index/main.nf @@ -0,0 +1,35 @@ +process BWA_INDEX { + tag "$fasta" + label 'process_high' + + conda (params.enable_conda ? "bioconda::bwa=0.7.17" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' : + 'quay.io/biocontainers/bwa:0.7.17--hed695b0_7' }" + + input: + path fasta + + output: + path "bwa" , emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir bwa + bwa \\ + index \\ + $args \\ + -p bwa/${fasta.baseName} \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bwa/index/meta.yml b/modules/nf-core/modules/bwa/index/meta.yml new file mode 100644 index 000000000..2bbd81d9c --- /dev/null +++ b/modules/nf-core/modules/bwa/index/meta.yml @@ -0,0 +1,32 @@ +name: bwa_index +description: Create BWA index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - fasta: + type: file + description: Input genome fasta file +output: + - index: + type: file + description: BWA genome index files + pattern: "*.{amb,ann,bwt,pac,sa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/modules/bwa/mem/main.nf b/modules/nf-core/modules/bwa/mem/main.nf new file mode 100644 index 000000000..f55af9443 --- /dev/null +++ b/modules/nf-core/modules/bwa/mem/main.nf @@ -0,0 +1,43 @@ +process BWA_MEM { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? "bioconda::bwa=0.7.17 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' : + 'quay.io/biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:8110a70be2bfe7f75a2ea7f2a89cda4cc7732095-0' }" + + input: + tuple val(meta), path(reads) + path index + val sort_bam + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/.amb//'` + + bwa mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwa: \$(echo \$(bwa 2>&1) | sed 's/^.*Version: //; s/Contact:.*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/bwa/mem/meta.yml b/modules/nf-core/modules/bwa/mem/meta.yml new file mode 100644 index 000000000..f84c5227b --- /dev/null +++ b/modules/nf-core/modules/bwa/mem/meta.yml @@ -0,0 +1,50 @@ +name: bwa_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: http://bio-bwa.sourceforge.net/ + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}" + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@jeremy1805" diff --git a/modules/nf-core/modules/chromap/chromap/main.nf b/modules/nf-core/modules/chromap/chromap/main.nf new file mode 100644 index 000000000..137f0340c --- /dev/null +++ b/modules/nf-core/modules/chromap/chromap/main.nf @@ -0,0 +1,95 @@ +process CHROMAP_CHROMAP { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::chromap=0.2.1 bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1f09f39f20b1c4ee36581dc81cc323c70e661633:963e4fe6a85c548a4018585660aed79780a175d3-0' : + 'quay.io/biocontainers/mulled-v2-1f09f39f20b1c4ee36581dc81cc323c70e661633:963e4fe6a85c548a4018585660aed79780a175d3-0' }" + + input: + tuple val(meta), path(reads) + path fasta + path index + path barcodes + path whitelist + path chr_order + path pairs_chr_order + + output: + tuple val(meta), path("*.bed.gz") , optional:true, emit: bed + tuple val(meta), path("*.bam") , optional:true, emit: bam + tuple val(meta), path("*.tagAlign.gz"), optional:true, emit: tagAlign + tuple val(meta), path("*.pairs.gz") , optional:true, emit: pairs + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def args_list = args.tokenize() + + def file_extension = args.contains("--SAM") ? 'sam' : args.contains("--TagAlign")? 'tagAlign' : args.contains("--pairs")? 'pairs' : 'bed' + if (barcodes) { + args_list << "-b ${barcodes.join(',')}" + if (whitelist) { + args_list << "--barcode-whitelist $whitelist" + } + } + if (chr_order) { + args_list << "--chr-order $chr_order" + } + if (pairs_chr_order){ + args_list << "--pairs-natural-chr-order $pairs_chr_order" + } + def final_args = args_list.join(' ') + def compression_cmds = "gzip -n ${prefix}.${file_extension}" + if (args.contains("--SAM")) { + compression_cmds = """ + samtools view $args2 -@ $task.cpus -bh \\ + -o ${prefix}.bam ${prefix}.${file_extension} + rm ${prefix}.${file_extension} + """ + } + if (meta.single_end) { + """ + chromap \\ + $final_args \\ + -t $task.cpus \\ + -x $index \\ + -r $fasta \\ + -1 ${reads.join(',')} \\ + -o ${prefix}.${file_extension} + + $compression_cmds + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chromap: \$(echo \$(chromap --version 2>&1)) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } else { + """ + chromap \\ + $final_args \\ + -t $task.cpus \\ + -x $index \\ + -r $fasta \\ + -1 ${reads[0]} \\ + -2 ${reads[1]} \\ + -o ${prefix}.${file_extension} + + $compression_cmds + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chromap: \$(echo \$(chromap --version 2>&1)) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/chromap/chromap/meta.yml b/modules/nf-core/modules/chromap/chromap/meta.yml new file mode 100644 index 000000000..a86fddc99 --- /dev/null +++ b/modules/nf-core/modules/chromap/chromap/meta.yml @@ -0,0 +1,88 @@ +name: chromap_chromap +description: | + Performs preprocessing and alignment of chromatin fastq files to + fasta reference files using chromap. +keywords: + - chromap + - alignment + - map + - fastq + - bam + - sam + - hi-c + - atac-seq + - chip-seq + - trimming + - duplicate removal +tools: + - chromap: + description: Fast alignment and preprocessing of chromatin profiles + homepage: https://github.com/haowenz/chromap + documentation: https://github.com/haowenz/chromap + tool_dev_url: https://github.com/haowenz/chromap + doi: "" + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - fasta: + type: file + description: | + The fasta reference file. + - index: + type: file + description: | + Chromap genome index files (*.index) + - barcodes: + type: file + description: | + Cell barcode files + - whitelist: + type: file + description: | + Cell barcode whitelist file + - chr_order: + type: file + description: | + Custom chromosome order + - pairs_chr_order: + type: file + description: | + Natural chromosome order for pairs flipping +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bed: + type: file + description: BED file + pattern: "*.bed.gz" + - bam: + type: file + description: BAM file + pattern: "*.bam" + - tagAlign: + type: file + description: tagAlign file + pattern: "*.tagAlign.gz" + - pairs: + type: file + description: pairs file + pattern: "*.pairs.gz" + +authors: + - "@mahesh-panchal" diff --git a/modules/nf-core/modules/chromap/index/main.nf b/modules/nf-core/modules/chromap/index/main.nf new file mode 100644 index 000000000..ee3706959 --- /dev/null +++ b/modules/nf-core/modules/chromap/index/main.nf @@ -0,0 +1,36 @@ +process CHROMAP_INDEX { + tag "$fasta" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::chromap=0.2.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/chromap:0.2.1--hd03093a_0' : + 'quay.io/biocontainers/chromap:0.2.1--hd03093a_0' }" + + input: + path fasta + + output: + path "*.index" , emit: index + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = fasta.baseName + """ + chromap \\ + -i \\ + $args \\ + -t $task.cpus \\ + -r $fasta \\ + -o ${prefix}.index + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + chromap: \$(echo \$(chromap --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/chromap/index/meta.yml b/modules/nf-core/modules/chromap/index/meta.yml new file mode 100644 index 000000000..6659221f6 --- /dev/null +++ b/modules/nf-core/modules/chromap/index/meta.yml @@ -0,0 +1,33 @@ +name: chromap_index +description: Indexes a fasta reference genome ready for chromatin profiling. +keywords: + - index + - fasta + - genome + - reference +tools: + - chromap: + description: Fast alignment and preprocessing of chromatin profiles + homepage: https://github.com/haowenz/chromap + documentation: https://github.com/haowenz/chromap + tool_dev_url: https://github.com/haowenz/chromap + doi: "" + licence: ["GPL v3"] + +input: + - fasta: + type: file + description: Fasta reference file. + +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - index: + type: file + description: Index file of the reference genome + pattern: "*.{index}" + +authors: + - "@mahesh-panchal" diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf new file mode 100644 index 000000000..203e485ae --- /dev/null +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf @@ -0,0 +1,24 @@ +process CUSTOM_DUMPSOFTWAREVERSIONS { + label 'process_low' + + // Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container + conda (params.enable_conda ? 'bioconda::multiqc=1.13a' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' : + 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }" + + input: + path versions + + output: + path "software_versions.yml" , emit: yml + path "software_versions_mqc.yml", emit: mqc_yml + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + template 'dumpsoftwareversions.py' +} diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml new file mode 100644 index 000000000..60b546a01 --- /dev/null +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml @@ -0,0 +1,34 @@ +name: custom_dumpsoftwareversions +description: Custom module used to dump software versions within the nf-core pipeline template +keywords: + - custom + - version +tools: + - custom: + description: Custom module used to dump software versions within the nf-core pipeline template + homepage: https://github.com/nf-core/tools + documentation: https://github.com/nf-core/tools + licence: ["MIT"] +input: + - versions: + type: file + description: YML file containing software versions + pattern: "*.yml" + +output: + - yml: + type: file + description: Standard YML file containing software versions + pattern: "software_versions.yml" + - mqc_yml: + type: file + description: MultiQC custom content YML file containing software versions + pattern: "software_versions_mqc.yml" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@drpatelh" + - "@grst" diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py new file mode 100644 index 000000000..787bdb7b1 --- /dev/null +++ b/modules/nf-core/modules/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +import platform +from textwrap import dedent + +import yaml + + +def _make_versions_html(versions): + html = [ + dedent( + """\\ + + + + + + + + + + """ + ) + ] + for process, tmp_versions in sorted(versions.items()): + html.append("") + for i, (tool, version) in enumerate(sorted(tmp_versions.items())): + html.append( + dedent( + f"""\\ + + + + + + """ + ) + ) + html.append("") + html.append("
Process Name Software Version
{process if (i == 0) else ''}{tool}{version}
") + return "\\n".join(html) + + +versions_this_module = {} +versions_this_module["${task.process}"] = { + "python": platform.python_version(), + "yaml": yaml.__version__, +} + +with open("$versions") as f: + versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + +# aggregate versions by the module name (derived from fully-qualified process name) +versions_by_module = {} +for process, process_versions in versions_by_process.items(): + module = process.split(":")[-1] + try: + if versions_by_module[module] != process_versions: + raise AssertionError( + "We assume that software versions are the same between all modules. " + "If you see this error-message it means you discovered an edge-case " + "and should open an issue in nf-core/tools. " + ) + except KeyError: + versions_by_module[module] = process_versions + +versions_by_module["Workflow"] = { + "Nextflow": "$workflow.nextflow.version", + "$workflow.manifest.name": "$workflow.manifest.version", +} + +versions_mqc = { + "id": "software_versions", + "section_name": "${workflow.manifest.name} Software Versions", + "section_href": "https://github.com/${workflow.manifest.name}", + "plot_type": "html", + "description": "are collected at run time from the software output.", + "data": _make_versions_html(versions_by_module), +} + +with open("software_versions.yml", "w") as f: + yaml.dump(versions_by_module, f, default_flow_style=False) +with open("software_versions_mqc.yml", "w") as f: + yaml.dump(versions_mqc, f, default_flow_style=False) + +with open("versions.yml", "w") as f: + yaml.dump(versions_this_module, f, default_flow_style=False) diff --git a/modules/nf-core/modules/custom/getchromsizes/main.nf b/modules/nf-core/modules/custom/getchromsizes/main.nf new file mode 100644 index 000000000..0eabf3a4c --- /dev/null +++ b/modules/nf-core/modules/custom/getchromsizes/main.nf @@ -0,0 +1,32 @@ +process CUSTOM_GETCHROMSIZES { + tag "$fasta" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + path fasta + + output: + path '*.sizes' , emit: sizes + path '*.fai' , emit: fai + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools faidx $fasta + cut -f 1,2 ${fasta}.fai > ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + custom: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/custom/getchromsizes/meta.yml b/modules/nf-core/modules/custom/getchromsizes/meta.yml new file mode 100644 index 000000000..ee6c25718 --- /dev/null +++ b/modules/nf-core/modules/custom/getchromsizes/meta.yml @@ -0,0 +1,38 @@ +name: custom_getchromsizes +description: Generates a FASTA file of chromosome sizes and a fasta index file +keywords: + - fasta + - chromosome + - indexing +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + tool_dev_url: https://github.com/samtools/samtools + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + +input: + - fasta: + type: file + description: FASTA file + pattern: "*.{fasta}" + +output: + - sizes: + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" + +authors: + - "@tamara-hodgetts" + - "@chris-cheshire" diff --git a/modules/nf-core/modules/deeptools/computematrix/main.nf b/modules/nf-core/modules/deeptools/computematrix/main.nf new file mode 100644 index 000000000..96dfef3c6 --- /dev/null +++ b/modules/nf-core/modules/deeptools/computematrix/main.nf @@ -0,0 +1,39 @@ +process DEEPTOOLS_COMPUTEMATRIX { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(bigwig) + path bed + + output: + tuple val(meta), path("*.mat.gz") , emit: matrix + tuple val(meta), path("*.mat.tab"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + computeMatrix \\ + $args \\ + --regionsFileName $bed \\ + --scoreFileName $bigwig \\ + --outFileName ${prefix}.computeMatrix.mat.gz \\ + --outFileNameMatrix ${prefix}.computeMatrix.vals.mat.tab \\ + --numberOfProcessors $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(computeMatrix --version | sed -e "s/computeMatrix //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/deeptools/computematrix/meta.yml b/modules/nf-core/modules/deeptools/computematrix/meta.yml new file mode 100644 index 000000000..eaa990dd9 --- /dev/null +++ b/modules/nf-core/modules/deeptools/computematrix/meta.yml @@ -0,0 +1,58 @@ +name: deeptools_computematrix +description: calculates scores per genome regions for other deeptools plotting utilities +keywords: + - genome + - regions + - scores + - matrix +tools: + - deeptools: + description: A set of user-friendly tools for normalization and visualization of deep-sequencing data + homepage: + documentation: https://deeptools.readthedocs.io/en/develop/index.html + tool_dev_url: https://github.com/deeptools/deepTools + doi: "10.1093/nar/gku365" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bigwig: + type: file + description: bigwig file containing genomic scores + pattern: "*.{bw,bigwig}" + - bed: + type: file + description: bed file containing genomic regions + pattern: "*.{bed}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - matrix: + type: file + description: | + gzipped matrix file needed by the plotHeatmap and plotProfile + deeptools utilities + pattern: "*.{computeMatrix.mat.gz}" + - table: + type: file + description: | + tabular file containing the scores of the generated matrix + pattern: "*.{computeMatrix.vals.mat.tab}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@jeremy1805" + - "@emiller88" + - "@drpatelh" + - "@joseespinosa" diff --git a/modules/nf-core/modules/deeptools/plotfingerprint/main.nf b/modules/nf-core/modules/deeptools/plotfingerprint/main.nf new file mode 100644 index 000000000..83613be7b --- /dev/null +++ b/modules/nf-core/modules/deeptools/plotfingerprint/main.nf @@ -0,0 +1,41 @@ +process DEEPTOOLS_PLOTFINGERPRINT { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(bams), path(bais) + + output: + tuple val(meta), path("*.pdf") , emit: pdf + tuple val(meta), path("*.raw.txt") , emit: matrix + tuple val(meta), path("*.qcmetrics.txt"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extend = (meta.single_end && params.fragment_size > 0) ? "--extendReads ${params.fragment_size}" : '' + """ + plotFingerprint \\ + $args \\ + $extend \\ + --bamfiles ${bams.join(' ')} \\ + --plotFile ${prefix}.plotFingerprint.pdf \\ + --outRawCounts ${prefix}.plotFingerprint.raw.txt \\ + --outQualityMetrics ${prefix}.plotFingerprint.qcmetrics.txt \\ + --numberOfProcessors $task.cpus + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(plotFingerprint --version | sed -e "s/plotFingerprint //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/deeptools/plotfingerprint/meta.yml b/modules/nf-core/modules/deeptools/plotfingerprint/meta.yml new file mode 100644 index 000000000..07c257480 --- /dev/null +++ b/modules/nf-core/modules/deeptools/plotfingerprint/meta.yml @@ -0,0 +1,61 @@ +name: deeptools_plotfingerprint +description: plots cumulative reads coverages by BAM file +keywords: + - plot + - fingerprint + - cumulative coverage + - bam +tools: + - deeptools: + description: A set of user-friendly tools for normalization and visualization of deep-sequencing data + homepage: + documentation: https://deeptools.readthedocs.io/en/develop/index.html + tool_dev_url: https://github.com/deeptools/deepTools + doi: "10.1093/nar/gku365" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - bam: + type: file + description: One or more BAM files + pattern: "*.{bam}" + - bais: + type: file + description: Corresponding BAM file indexes + pattern: "*.bam.bai" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pdf: + type: file + description: | + Output figure containing resulting plot + pattern: "*.{plotFingerprint.pdf}" + - matrix: + type: file + description: | + Output file summarizing the read counts per bin + pattern: "*.{plotFingerprint.raw.txt}" + - metrics: + type: file + description: | + file containing BAM file quality metrics + pattern: "*.{qcmetrics.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@emiller88" + - "@drpatelh" + - "@joseespinosa" diff --git a/modules/nf-core/modules/deeptools/plotheatmap/main.nf b/modules/nf-core/modules/deeptools/plotheatmap/main.nf new file mode 100644 index 000000000..1e402e391 --- /dev/null +++ b/modules/nf-core/modules/deeptools/plotheatmap/main.nf @@ -0,0 +1,36 @@ +process DEEPTOOLS_PLOTHEATMAP { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(matrix) + + output: + tuple val(meta), path("*.pdf"), emit: pdf + tuple val(meta), path("*.tab"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + plotHeatmap \\ + $args \\ + --matrixFile $matrix \\ + --outFileName ${prefix}.plotHeatmap.pdf \\ + --outFileNameMatrix ${prefix}.plotHeatmap.mat.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(plotHeatmap --version | sed -e "s/plotHeatmap //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/deeptools/plotheatmap/meta.yml b/modules/nf-core/modules/deeptools/plotheatmap/meta.yml new file mode 100644 index 000000000..ea206fb67 --- /dev/null +++ b/modules/nf-core/modules/deeptools/plotheatmap/meta.yml @@ -0,0 +1,55 @@ +name: deeptools_plotheatmap +description: plots values produced by deeptools_computematrix as a heatmap +keywords: + - plot + - heatmap + - scores + - matrix +tools: + - deeptools: + description: A set of user-friendly tools for normalization and visualization of deep-sequencing data + homepage: + documentation: https://deeptools.readthedocs.io/en/develop/index.html + tool_dev_url: https://github.com/deeptools/deepTools + doi: "10.1093/nar/gku365" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - matrix: + type: file + description: | + gzipped matrix file produced by deeptools_ + computematrix deeptools utility + pattern: "*.{mat.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pdf: + type: file + description: | + Output figure containing resulting plot + pattern: "*.{plotHeatmap.pdf}" + - matrix: + type: file + description: | + File containing the matrix of values + used to generate the heatmap + pattern: "*.{plotHeatmap.mat.tab}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@emiller88" + - "@drpatelh" + - "@joseespinosa" diff --git a/modules/nf-core/modules/deeptools/plotprofile/main.nf b/modules/nf-core/modules/deeptools/plotprofile/main.nf new file mode 100644 index 000000000..d83a94938 --- /dev/null +++ b/modules/nf-core/modules/deeptools/plotprofile/main.nf @@ -0,0 +1,36 @@ +process DEEPTOOLS_PLOTPROFILE { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? 'bioconda::deeptools=3.5.1' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/deeptools:3.5.1--py_0' : + 'quay.io/biocontainers/deeptools:3.5.1--py_0' }" + + input: + tuple val(meta), path(matrix) + + output: + tuple val(meta), path("*.pdf"), emit: pdf + tuple val(meta), path("*.tab"), emit: table + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + plotProfile \\ + $args \\ + --matrixFile $matrix \\ + --outFileName ${prefix}.plotProfile.pdf \\ + --outFileNameData ${prefix}.plotProfile.tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deeptools: \$(plotProfile --version | sed -e "s/plotProfile //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/deeptools/plotprofile/meta.yml b/modules/nf-core/modules/deeptools/plotprofile/meta.yml new file mode 100644 index 000000000..795fda444 --- /dev/null +++ b/modules/nf-core/modules/deeptools/plotprofile/meta.yml @@ -0,0 +1,55 @@ +name: deeptools_plotprofile +description: plots values produced by deeptools_computematrix as a profile plot +keywords: + - plot + - profile + - scores + - matrix +tools: + - deeptools: + description: A set of user-friendly tools for normalization and visualization of deep-sequencing data + homepage: + documentation: https://deeptools.readthedocs.io/en/develop/index.html + tool_dev_url: https://github.com/deeptools/deepTools + doi: "10.1093/nar/gku365" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - matrix: + type: file + description: | + gzipped matrix file produced by deeptools_ + computematrix deeptools utility + pattern: "*.{mat.gz}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - pdf: + type: file + description: | + Output figure containing resulting plot + pattern: "*.{plotProfile.pdf}" + - matrix: + type: file + description: | + File containing the matrix of values + used to generate the profile + pattern: "*.{plotProfile.mat.tab}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@emiller88" + - "@drpatelh" + - "@joseespinosa" diff --git a/modules/nf-core/modules/fastqc/main.nf b/modules/nf-core/modules/fastqc/main.nf new file mode 100644 index 000000000..05730368b --- /dev/null +++ b/modules/nf-core/modules/fastqc/main.nf @@ -0,0 +1,59 @@ +process FASTQC { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::fastqc=0.11.9" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastqc:0.11.9--0' : + 'quay.io/biocontainers/fastqc:0.11.9--0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.zip") , emit: zip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // Add soft-links to original FastQs for consistent naming in pipeline + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + fastqc $args --threads $task.cpus ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + fastqc $args --threads $task.cpus ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.html + touch ${prefix}.zip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqc: \$( fastqc --version | sed -e "s/FastQC v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/fastqc/meta.yml b/modules/nf-core/modules/fastqc/meta.yml new file mode 100644 index 000000000..4da5bb5a0 --- /dev/null +++ b/modules/nf-core/modules/fastqc/meta.yml @@ -0,0 +1,52 @@ +name: fastqc +description: Run FastQC on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: | + FastQC gives general quality metrics about your reads. + It provides information about the quality score distribution + across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other + overrepresented sequences. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ + documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ + licence: ["GPL-2.0-only"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: FastQC report + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@grst" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/modules/gffread/main.nf b/modules/nf-core/modules/gffread/main.nf new file mode 100644 index 000000000..7c575c97f --- /dev/null +++ b/modules/nf-core/modules/gffread/main.nf @@ -0,0 +1,33 @@ +process GFFREAD { + tag "$gff" + label 'process_low' + + conda (params.enable_conda ? "bioconda::gffread=0.12.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' : + 'quay.io/biocontainers/gffread:0.12.1--h8b12597_0' }" + + input: + path gff + + output: + path "*.gtf" , emit: gtf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${gff.baseName}" + """ + gffread \\ + $gff \\ + $args \\ + -o ${prefix}.gtf + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gffread: \$(gffread --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gffread/meta.yml b/modules/nf-core/modules/gffread/meta.yml new file mode 100644 index 000000000..203357477 --- /dev/null +++ b/modules/nf-core/modules/gffread/meta.yml @@ -0,0 +1,33 @@ +name: gffread +description: Validate, filter, convert and perform various other operations on GFF files +keywords: + - gff + - conversion + - validation +tools: + - gffread: + description: GFF/GTF utility providing format conversions, region filtering, FASTA sequence extraction and more. + homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread + tool_dev_url: https://github.com/gpertea/gffread + doi: 10.12688/f1000research.23297.1 + licence: ["MIT"] + +input: + - gff: + type: file + description: A reference file in either the GFF3, GFF2 or GTF format. + pattern: "*.{gff, gtf}" + +output: + - gtf: + type: file + description: GTF file resulting from the conversion of the GFF input file + pattern: "*.{gtf}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@emiller88" diff --git a/modules/nf-core/modules/gunzip/main.nf b/modules/nf-core/modules/gunzip/main.nf new file mode 100644 index 000000000..703670495 --- /dev/null +++ b/modules/nf-core/modules/gunzip/main.nf @@ -0,0 +1,44 @@ +process GUNZIP { + tag "$archive" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + gunzip = archive.toString() - '.gz' + """ + gunzip \\ + -f \\ + $args \\ + $archive + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/gunzip/meta.yml b/modules/nf-core/modules/gunzip/meta.yml new file mode 100644 index 000000000..4d2ebc84e --- /dev/null +++ b/modules/nf-core/modules/gunzip/meta.yml @@ -0,0 +1,34 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/modules/homer/annotatepeaks/main.nf b/modules/nf-core/modules/homer/annotatepeaks/main.nf new file mode 100644 index 000000000..9056a5ab5 --- /dev/null +++ b/modules/nf-core/modules/homer/annotatepeaks/main.nf @@ -0,0 +1,41 @@ +process HOMER_ANNOTATEPEAKS { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda (params.enable_conda ? "bioconda::homer=4.11" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/homer:4.11--pl526hc9558a2_3' : + 'quay.io/biocontainers/homer:4.11--pl526hc9558a2_3' }" + + input: + tuple val(meta), path(peak) + path fasta + path gtf + + output: + tuple val(meta), path("*annotatePeaks.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '4.11' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + annotatePeaks.pl \\ + $peak \\ + $fasta \\ + $args \\ + -gtf $gtf \\ + -cpu $task.cpus \\ + > ${prefix}.annotatePeaks.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + homer: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/homer/annotatepeaks/meta.yml b/modules/nf-core/modules/homer/annotatepeaks/meta.yml new file mode 100644 index 000000000..b815e975d --- /dev/null +++ b/modules/nf-core/modules/homer/annotatepeaks/meta.yml @@ -0,0 +1,48 @@ +name: homer_annotatepeaks +description: Annotate peaks with HOMER suite +keywords: + - annotations + - peaks + - bed +tools: + - homer: + description: | + HOMER (Hypergeometric Optimization of Motif EnRichment) is a suite of tools for Motif Discovery and next-gen sequencing analysis. + documentation: http://homer.ucsd.edu/homer/ + doi: 10.1016/j.molcel.2010.05.004. + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - peaks: + type: file + description: The peak files in bed format + pattern: "*.bed" + - fasta: + type: file + description: Fasta file of reference genome + pattern: "*.fasta" + - gtf: + type: file + description: GTF file of reference genome + pattern: "*.gtf" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - annotated_peaks: + type: file + description: The annotated peaks + pattern: "*annotatePeaks.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/modules/khmer/uniquekmers/main.nf b/modules/nf-core/modules/khmer/uniquekmers/main.nf new file mode 100644 index 000000000..8ad89a628 --- /dev/null +++ b/modules/nf-core/modules/khmer/uniquekmers/main.nf @@ -0,0 +1,38 @@ +process KHMER_UNIQUEKMERS { + tag "$fasta" + label 'process_low' + + conda (params.enable_conda ? "bioconda::khmer=3.0.0a3" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/khmer:3.0.0a3--py37haa7609a_2' : + 'quay.io/biocontainers/khmer:3.0.0a3--py37haa7609a_2' }" + + input: + path fasta + val kmer_size + + output: + path "report.txt" , emit: report + path "kmers.txt" , emit: kmers + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + unique-kmers.py \\ + -k $kmer_size \\ + -R report.txt \\ + $args \\ + $fasta + + grep ^number report.txt | sed 's/^.*:.[[:blank:]]//g' > kmers.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + khmer: \$( unique-kmers.py --version 2>&1 | grep ^khmer | sed 's/^khmer //;s/ .*\$//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/khmer/uniquekmers/meta.yml b/modules/nf-core/modules/khmer/uniquekmers/meta.yml new file mode 100644 index 000000000..31405cc16 --- /dev/null +++ b/modules/nf-core/modules/khmer/uniquekmers/meta.yml @@ -0,0 +1,42 @@ +name: "khmer_uniquekmers" +description: +keywords: + - khmer + - k-mer + - effective genome size + +tools: + - "khmer": + description: khmer k-mer counting library + homepage: https://github.com/dib-lab/khmer + documentation: https://khmer.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/dib-lab/khmer + doi: "10.12688/f1000research.6924.1" + licence: ["BSD License"] + +input: + - fasta: + type: file + description: fasta file + pattern: "*.{fa,fasta}" + - kmer_size: + type: value + description: k-mer size to use + pattern: "[0-9]+" + +output: + - report: + type: file + description: Text file containing unique-kmers.py execution report + pattern: "report.txt" + - kmers: + type: file + description: Text file containing number of kmers + pattern: "kmers.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@JoseEspinosa" diff --git a/modules/nf-core/modules/macs2/callpeak/main.nf b/modules/nf-core/modules/macs2/callpeak/main.nf new file mode 100644 index 000000000..9aaf97a93 --- /dev/null +++ b/modules/nf-core/modules/macs2/callpeak/main.nf @@ -0,0 +1,53 @@ +process MACS2_CALLPEAK { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::macs2=2.2.7.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/macs2:2.2.7.1--py38h4a8c8d9_3' : + 'quay.io/biocontainers/macs2:2.2.7.1--py38h4a8c8d9_3' }" + + input: + tuple val(meta), path(ipbam), path(controlbam) + val macs2_gsize + + output: + tuple val(meta), path("*.{narrowPeak,broadPeak}"), emit: peak + tuple val(meta), path("*.xls") , emit: xls + path "versions.yml" , emit: versions + + tuple val(meta), path("*.gappedPeak"), optional:true, emit: gapped + tuple val(meta), path("*.bed") , optional:true, emit: bed + tuple val(meta), path("*.bdg") , optional:true, emit: bdg + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def args_list = args.tokenize() + def format = meta.single_end ? 'BAM' : 'BAMPE' + def control = controlbam ? "--control $controlbam" : '' + if(args_list.contains('--format')){ + def id = args_list.findIndexOf{it=='--format'} + format = args_list[id+1] + args_list.remove(id+1) + args_list.remove(id) + } + """ + macs2 \\ + callpeak \\ + ${args_list.join(' ')} \\ + --gsize $macs2_gsize \\ + --format $format \\ + --name $prefix \\ + --treatment $ipbam \\ + $control + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + macs2: \$(macs2 --version | sed -e "s/macs2 //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/macs2/callpeak/meta.yml b/modules/nf-core/modules/macs2/callpeak/meta.yml new file mode 100644 index 000000000..982bc5b24 --- /dev/null +++ b/modules/nf-core/modules/macs2/callpeak/meta.yml @@ -0,0 +1,63 @@ +name: macs2_callpeak +description: Peak calling of enriched genomic regions of ChIP-seq and ATAC-seq experiments +keywords: + - alignment + - atac-seq + - chip-seq + - peak-calling +tools: + - macs2: + description: Model Based Analysis for ChIP-Seq data + homepage: None + documentation: https://docs.csc.fi/apps/macs2/ + tool_dev_url: https://github.com/macs3-project/MACS + doi: "https://doi.org/10.1101/496521" + licence: ["BSD"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ipbam: + type: file + description: The ChIP-seq treatment file + - controlbam: + type: file + description: The control file + - macs2_gsize: + type: string + description: Effective genome size. It can be 1.0e+9 or 1000000000, or shortcuts:'hs' for human (2.7e9), + 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm' for fruitfly (1.2e8) + +output: + - versions: + type: file + description: File containing software version + pattern: "versions.yml" + - peak: + type: file + description: BED file containing annotated peaks + pattern: "*.gappedPeak,*.narrowPeak}" + - xls: + type: file + description: xls file containing annotated peaks + pattern: "*.xls" + - gapped: + type: file + description: Optional BED file containing gapped peak + pattern: "*.gappedPeak" + - bed: + type: file + description: Optional BED file containing peak summits locations for every peak + pattern: "*.bed" + - bdg: + type: file + description: Optional bedGraph files for input and treatment input samples + pattern: "*.bdg" + +authors: + - "@ntoda03" + - "@JoseEspinosa" + - "@jianhong" diff --git a/modules/nf-core/modules/phantompeakqualtools/main.nf b/modules/nf-core/modules/phantompeakqualtools/main.nf new file mode 100644 index 000000000..43fca6836 --- /dev/null +++ b/modules/nf-core/modules/phantompeakqualtools/main.nf @@ -0,0 +1,37 @@ +process PHANTOMPEAKQUALTOOLS { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda (params.enable_conda ? "bioconda::phantompeakqualtools=1.2.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/phantompeakqualtools:1.2.2--0' : + 'quay.io/biocontainers/phantompeakqualtools:1.2.2--0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.out") , emit: spp + tuple val(meta), path("*.pdf") , emit: pdf + tuple val(meta), path("*.Rdata"), emit: rdata + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.2.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + RUN_SPP=`which run_spp.R` + Rscript $args -e "library(caTools); source(\\"\$RUN_SPP\\")" -c="$bam" -savp="${prefix}.spp.pdf" -savd="${prefix}.spp.Rdata" -out="${prefix}.spp.out" $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + phantompeakqualtools: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/phantompeakqualtools/meta.yml b/modules/nf-core/modules/phantompeakqualtools/meta.yml new file mode 100644 index 000000000..6488500d4 --- /dev/null +++ b/modules/nf-core/modules/phantompeakqualtools/meta.yml @@ -0,0 +1,60 @@ +name: "phantompeakqualtools" + +description: +keywords: + - "ChIP-Seq" + - "QC" + - "phantom peaks" +tools: + - "phantompeakqualtools": + description: | + "This package computes informative enrichment and quality measures + for ChIP-seq/DNase-seq/FAIRE-seq/MNase-seq data. It can also be used + to obtain robust estimates of the predominant fragment length or + characteristic tag shift values in these assays." + homepage: "None" + documentation: "https://github.com/kundajelab/phantompeakqualtools" + tool_dev_url: "https://github.com/kundajelab/phantompeakqualtools" + doi: "https://doi.org/10.1101/gr.136184.111" + licence: "['BSD-3-clause']" + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - spp: + type: file + description: | + A ChIP-Seq Processing Pipeline file containing + peakshift/phantomPeak results + pattern: "*.{out}" + - pdf: + type: file + description: A pdf containing save cross-correlation plots + pattern: "*.{pdf}" + - rdata: + type: file + description: Rdata file containing the R session + pattern: "*.{Rdata}" + +authors: + - "@drpatelh" + - "@Emiller88" + - "@JoseEspinosa" diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf b/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf new file mode 100644 index 000000000..63f4e8729 --- /dev/null +++ b/modules/nf-core/modules/picard/collectmultiplemetrics/main.nf @@ -0,0 +1,67 @@ +process PICARD_COLLECTMULTIPLEMETRICS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + path fasta + path fai + + output: + tuple val(meta), path("*_metrics"), emit: metrics + tuple val(meta), path("*.pdf") , emit: pdf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + -Xmx${avail_mem}g \\ + CollectMultipleMetrics \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectMultipleMetrics \\ + $reference + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectMultipleMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectMultipleMetrics.alignment_summary_metrics + touch ${prefix}.CollectMultipleMetrics.insert_size_metrics + touch ${prefix}.CollectMultipleMetrics.quality_distribution.pdf + touch ${prefix}.CollectMultipleMetrics.base_distribution_by_cycle_metrics + touch ${prefix}.CollectMultipleMetrics.quality_by_cycle_metrics + touch ${prefix}.CollectMultipleMetrics.read_length_histogram.pdf + touch ${prefix}.CollectMultipleMetrics.base_distribution_by_cycle.pdf + touch ${prefix}.CollectMultipleMetrics.quality_by_cycle.pdf + touch ${prefix}.CollectMultipleMetrics.insert_size_histogram.pdf + touch ${prefix}.CollectMultipleMetrics.quality_distribution_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectMultipleMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml b/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml new file mode 100644 index 000000000..c11b02cfa --- /dev/null +++ b/modules/nf-core/modules/picard/collectmultiplemetrics/meta.yml @@ -0,0 +1,54 @@ +name: picard_collectmultiplemetrics +description: Collect multiple metrics from a BAM file +keywords: + - alignment + - metrics + - statistics + - insert + - quality + - bam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" + - fasta: + type: file + description: Genome fasta file + - fai: + type: file + description: Index of FASTA file. Only needed when fasta is supplied. + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - metrics: + type: file + description: Alignment metrics files generated by picard + pattern: "*_{metrics}" + - pdf: + type: file + description: PDF plots of metrics + pattern: "*.{pdf}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/modules/picard/markduplicates/main.nf b/modules/nf-core/modules/picard/markduplicates/main.nf new file mode 100644 index 000000000..4e559fea0 --- /dev/null +++ b/modules/nf-core/modules/picard/markduplicates/main.nf @@ -0,0 +1,58 @@ +process PICARD_MARKDUPLICATES { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam") , emit: bam + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.metrics.txt"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + picard \\ + -Xmx${avail_mem}g \\ + MarkDuplicates \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.bam \\ + --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.bam.bai + touch ${prefix}.MarkDuplicates.metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/picard/markduplicates/meta.yml b/modules/nf-core/modules/picard/markduplicates/meta.yml new file mode 100644 index 000000000..842817bcd --- /dev/null +++ b/modules/nf-core/modules/picard/markduplicates/meta.yml @@ -0,0 +1,52 @@ +name: picard_markduplicates +description: Locate and tag duplicate reads in a BAM file +keywords: + - markduplicates + - pcr + - duplicates + - bam + - sam + - cram +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file + pattern: "*.{bam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with duplicate reads marked/removed + pattern: "*.{bam}" + - bai: + type: file + description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag + pattern: "*.{bai}" + - metrics: + type: file + description: Duplicate metrics file generated by picard + pattern: "*.{metrics.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@projectoriented" diff --git a/modules/nf-core/modules/picard/mergesamfiles/main.nf b/modules/nf-core/modules/picard/mergesamfiles/main.nf new file mode 100644 index 000000000..cccf4d3d5 --- /dev/null +++ b/modules/nf-core/modules/picard/mergesamfiles/main.nf @@ -0,0 +1,52 @@ +process PICARD_MERGESAMFILES { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::picard=2.27.4" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:2.27.4--hdfd78af_0' : + 'quay.io/biocontainers/picard:2.27.4--hdfd78af_0' }" + + input: + tuple val(meta), path(bams) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bam_files = bams.sort() + def avail_mem = 3 + if (!task.memory) { + log.info '[Picard MergeSamFiles] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + if (bam_files.size() > 1) { + """ + picard \\ + -Xmx${avail_mem}g \\ + MergeSamFiles \\ + $args \\ + ${'--INPUT '+bam_files.join(' --INPUT ')} \\ + --OUTPUT ${prefix}.bam + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$( echo \$(picard MergeSamFiles --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + } else { + """ + ln -s ${bam_files[0]} ${prefix}.bam + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$( echo \$(picard MergeSamFiles --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/picard/mergesamfiles/meta.yml b/modules/nf-core/modules/picard/mergesamfiles/meta.yml new file mode 100644 index 000000000..5f07ecd0a --- /dev/null +++ b/modules/nf-core/modules/picard/mergesamfiles/meta.yml @@ -0,0 +1,41 @@ +name: picard_mergesamfiles +description: Merges multiple BAM files into a single file +keywords: + - merge + - alignment + - bam + - sam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: List of BAM files + pattern: "*.{bam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Merged BAM file + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/modules/preseq/lcextrap/main.nf b/modules/nf-core/modules/preseq/lcextrap/main.nf new file mode 100644 index 000000000..97261557e --- /dev/null +++ b/modules/nf-core/modules/preseq/lcextrap/main.nf @@ -0,0 +1,40 @@ +process PRESEQ_LCEXTRAP { + tag "$meta.id" + label 'process_medium' + label 'error_ignore' + + conda (params.enable_conda ? "bioconda::preseq=3.1.2" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/preseq:3.1.2--h445547b_2': + 'quay.io/biocontainers/preseq:3.1.2--h445547b_2' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.lc_extrap.txt"), emit: lc_extrap + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired_end = meta.single_end ? '' : '-pe' + """ + preseq \\ + lc_extrap \\ + $args \\ + $paired_end \\ + -output ${prefix}.lc_extrap.txt \\ + $bam + cp .command.err ${prefix}.command.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + preseq: \$(echo \$(preseq 2>&1) | sed 's/^.*Version: //; s/Usage:.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/preseq/lcextrap/meta.yml b/modules/nf-core/modules/preseq/lcextrap/meta.yml new file mode 100644 index 000000000..f1be05a2f --- /dev/null +++ b/modules/nf-core/modules/preseq/lcextrap/meta.yml @@ -0,0 +1,48 @@ +name: preseq_lcextrap +description: Software for predicting library complexity and genome coverage in high-throughput sequencing +keywords: + - preseq + - library + - complexity +tools: + - preseq: + description: Software for predicting library complexity and genome coverage in high-throughput sequencing + homepage: http://smithlabresearch.org/software/preseq/ + documentation: http://smithlabresearch.org/wp-content/uploads/manual.pdf + tool_dev_url: https://github.com/smithlabcode/preseq + doi: "" + licence: ["GPL"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - lc_extrap: + type: file + description: File containing output of Preseq lcextrap + pattern: "*.{lc_extrap.txt}" + - log: + type: file + description: Log file containing stderr produced by Preseq + pattern: "*.{log}" + +authors: + - "@drpatelh" + - "@Emiller88" diff --git a/modules/nf-core/modules/samtools/flagstat/main.nf b/modules/nf-core/modules/samtools/flagstat/main.nf new file mode 100644 index 000000000..03ec2dcf4 --- /dev/null +++ b/modules/nf-core/modules/samtools/flagstat/main.nf @@ -0,0 +1,35 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus-1} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/flagstat/meta.yml b/modules/nf-core/modules/samtools/flagstat/meta.yml new file mode 100644 index 000000000..952690639 --- /dev/null +++ b/modules/nf-core/modules/samtools/flagstat/meta.yml @@ -0,0 +1,49 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/modules/samtools/idxstats/main.nf b/modules/nf-core/modules/samtools/idxstats/main.nf new file mode 100644 index 000000000..4b2454198 --- /dev/null +++ b/modules/nf-core/modules/samtools/idxstats/main.nf @@ -0,0 +1,35 @@ +process SAMTOOLS_IDXSTATS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.idxstats"), emit: idxstats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + samtools \\ + idxstats \\ + $bam \\ + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/idxstats/meta.yml b/modules/nf-core/modules/samtools/idxstats/meta.yml new file mode 100644 index 000000000..3710ab882 --- /dev/null +++ b/modules/nf-core/modules/samtools/idxstats/meta.yml @@ -0,0 +1,50 @@ +name: samtools_idxstats +description: Reports alignment summary statistics for a BAM/CRAM/SAM file +keywords: + - stats + - mapping + - counts + - chromosome + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" diff --git a/modules/nf-core/modules/samtools/index/main.nf b/modules/nf-core/modules/samtools/index/main.nf new file mode 100644 index 000000000..e04e63e85 --- /dev/null +++ b/modules/nf-core/modules/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/index/meta.yml b/modules/nf-core/modules/samtools/index/meta.yml new file mode 100644 index 000000000..e5cadbc24 --- /dev/null +++ b/modules/nf-core/modules/samtools/index/meta.yml @@ -0,0 +1,53 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/modules/samtools/sort/main.nf b/modules/nf-core/modules/samtools/sort/main.nf new file mode 100644 index 000000000..b4fc1cbe9 --- /dev/null +++ b/modules/nf-core/modules/samtools/sort/main.nf @@ -0,0 +1,42 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(bam) + + output: + tuple val(meta), path("*.bam"), emit: bam + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools sort $args -@ $task.cpus -o ${prefix}.bam -T $prefix $bam + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/sort/meta.yml b/modules/nf-core/modules/samtools/sort/meta.yml new file mode 100644 index 000000000..a820c55a3 --- /dev/null +++ b/modules/nf-core/modules/samtools/sort/meta.yml @@ -0,0 +1,44 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" diff --git a/modules/nf-core/modules/samtools/stats/main.nf b/modules/nf-core/modules/samtools/stats/main.nf new file mode 100644 index 000000000..89b92d79e --- /dev/null +++ b/modules/nf-core/modules/samtools/stats/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_low' + + conda (params.enable_conda ? "bioconda::samtools=1.15.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.15.1--h1170115_0' : + 'quay.io/biocontainers/samtools:1.15.1--h1170115_0' }" + + input: + tuple val(meta), path(input), path(input_index) + path fasta + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/samtools/stats/meta.yml b/modules/nf-core/modules/samtools/stats/meta.yml new file mode 100644 index 000000000..cac50b1c0 --- /dev/null +++ b/modules/nf-core/modules/samtools/stats/meta.yml @@ -0,0 +1,53 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: hhttp://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - fasta: + type: optional file + description: Reference file the CRAM was created with + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" diff --git a/modules/nf-core/modules/subread/featurecounts/main.nf b/modules/nf-core/modules/subread/featurecounts/main.nf new file mode 100644 index 000000000..18e2a92bb --- /dev/null +++ b/modules/nf-core/modules/subread/featurecounts/main.nf @@ -0,0 +1,47 @@ +process SUBREAD_FEATURECOUNTS { + tag "$meta.id" + label 'process_medium' + + conda (params.enable_conda ? "bioconda::subread=2.0.1" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/subread:2.0.1--hed695b0_0' : + 'quay.io/biocontainers/subread:2.0.1--hed695b0_0' }" + + input: + tuple val(meta), path(bams), path(annotation) + + output: + tuple val(meta), path("*featureCounts.txt") , emit: counts + tuple val(meta), path("*featureCounts.txt.summary"), emit: summary + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired_end = meta.single_end ? '' : '-p' + + def strandedness = 0 + if (meta.strandedness == 'forward') { + strandedness = 1 + } else if (meta.strandedness == 'reverse') { + strandedness = 2 + } + """ + featureCounts \\ + $args \\ + $paired_end \\ + -T $task.cpus \\ + -a $annotation \\ + -s $strandedness \\ + -o ${prefix}.featureCounts.txt \\ + ${bams.join(' ')} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + subread: \$( echo \$(featureCounts -v 2>&1) | sed -e "s/featureCounts v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/subread/featurecounts/meta.yml b/modules/nf-core/modules/subread/featurecounts/meta.yml new file mode 100644 index 000000000..cf02f1ea7 --- /dev/null +++ b/modules/nf-core/modules/subread/featurecounts/meta.yml @@ -0,0 +1,52 @@ +name: subread_featurecounts +description: Count reads that map to genomic features +keywords: + - counts + - fasta + - genome + - reference + +tools: + - featurecounts: + description: featureCounts is a highly efficient general-purpose read summarization program that counts mapped reads for genomic features such as genes, exons, promoter, gene bodies, genomic bins and chromosomal locations. It can be used to count both RNA-seq and genomic DNA-seq reads. + homepage: http://bioinf.wehi.edu.au/featureCounts/ + documentation: http://bioinf.wehi.edu.au/subread-package/SubreadUsersGuide.pdf + doi: "10.1093/bioinformatics/btt656" + licence: ["GPL v3"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/SAM file containing read alignments + pattern: "*.{bam}" + - annotation: + type: file + description: Genomic features annotation in GTF or SAF + pattern: "*.{gtf,saf}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - counts: + type: file + description: Counts of reads mapping to features + pattern: "*featureCounts.txt" + - summary: + type: file + description: Summary log file + pattern: "*.featureCounts.txt.summary" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + +authors: + - "@ntoda03" diff --git a/modules/nf-core/modules/trimgalore/main.nf b/modules/nf-core/modules/trimgalore/main.nf new file mode 100644 index 000000000..3a3fca904 --- /dev/null +++ b/modules/nf-core/modules/trimgalore/main.nf @@ -0,0 +1,86 @@ +process TRIMGALORE { + tag "$meta.id" + label 'process_high' + + conda (params.enable_conda ? 'bioconda::trim-galore=0.6.7' : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/trim-galore:0.6.7--hdfd78af_0' : + 'quay.io/biocontainers/trim-galore:0.6.7--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*{trimmed,val}*.fq.gz"), emit: reads + tuple val(meta), path("*report.txt") , emit: log + path "versions.yml" , emit: versions + + tuple val(meta), path("*unpaired*.fq.gz") , emit: unpaired, optional: true + tuple val(meta), path("*.html") , emit: html , optional: true + tuple val(meta), path("*.zip") , emit: zip , optional: true + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + // Calculate number of --cores for TrimGalore based on value of task.cpus + // See: https://github.com/FelixKrueger/TrimGalore/blob/master/Changelog.md#version-060-release-on-1-mar-2019 + // See: https://github.com/nf-core/atacseq/pull/65 + def cores = 1 + if (task.cpus) { + cores = (task.cpus as int) - 4 + if (meta.single_end) cores = (task.cpus as int) - 3 + if (cores < 1) cores = 1 + if (cores > 4) cores = 4 + } + + // Clipping presets have to be evaluated in the context of SE/PE + def c_r1 = params.clip_r1 > 0 ? "--clip_r1 ${params.clip_r1}" : '' + def c_r2 = params.clip_r2 > 0 ? "--clip_r2 ${params.clip_r2}" : '' + def tpc_r1 = params.three_prime_clip_r1 > 0 ? "--three_prime_clip_r1 ${params.three_prime_clip_r1}" : '' + def tpc_r2 = params.three_prime_clip_r2 > 0 ? "--three_prime_clip_r2 ${params.three_prime_clip_r2}" : '' + + // Added soft-links to original fastqs for consistent naming in MultiQC + def prefix = task.ext.prefix ?: "${meta.id}" + if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -s $reads ${prefix}.fastq.gz + trim_galore \\ + $args \\ + --cores $cores \\ + --gzip \\ + $c_r1 \\ + $tpc_r1 \\ + ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + } else { + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -s ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -s ${reads[1]} ${prefix}_2.fastq.gz + trim_galore \\ + $args \\ + --cores $cores \\ + --paired \\ + --gzip \\ + $c_r1 \\ + $c_r2 \\ + $tpc_r1 \\ + $tpc_r2 \\ + ${prefix}_1.fastq.gz \\ + ${prefix}_2.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + trimgalore: \$(echo \$(trim_galore --version 2>&1) | sed 's/^.*version //; s/Last.*\$//') + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + } +} diff --git a/modules/nf-core/modules/trimgalore/meta.yml b/modules/nf-core/modules/trimgalore/meta.yml new file mode 100644 index 000000000..439f566df --- /dev/null +++ b/modules/nf-core/modules/trimgalore/meta.yml @@ -0,0 +1,64 @@ +name: trimgalore +description: Trim FastQ files using Trim Galore! +keywords: + - trimming + - adapters + - sequencing adapters + - fastq +tools: + - trimgalore: + description: | + A wrapper tool around Cutadapt and FastQC to consistently apply quality + and adapter trimming to FastQ files, with some extra functionality for + MspI-digested RRBS-type (Reduced Representation Bisufite-Seq) libraries. + homepage: https://www.bioinformatics.babraham.ac.uk/projects/trim_galore/ + documentation: https://github.com/FelixKrueger/TrimGalore/blob/master/Docs/Trim_Galore_User_Guide.md + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input adapter trimmed FastQ files of size 1 and 2 for + single-end and paired-end data, respectively. + pattern: "*.{fq.gz}" + - unpaired: + type: file + description: | + FastQ files containing unpaired reads from read 1 or read 2 + pattern: "*unpaired*.fq.gz" + - html: + type: file + description: FastQC report (optional) + pattern: "*_{fastqc.html}" + - zip: + type: file + description: FastQC report archive (optional) + pattern: "*_{fastqc.zip}" + - log: + type: file + description: Trim Galore! trimming report + pattern: "*_{report.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@FelixKrueger" diff --git a/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf b/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf new file mode 100644 index 000000000..b18b190ad --- /dev/null +++ b/modules/nf-core/modules/ucsc/bedgraphtobigwig/main.nf @@ -0,0 +1,37 @@ +process UCSC_BEDGRAPHTOBIGWIG { + tag "$meta.id" + label 'process_medium' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda (params.enable_conda ? "bioconda::ucsc-bedgraphtobigwig=377" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ucsc-bedgraphtobigwig:377--h446ed27_1' : + 'quay.io/biocontainers/ucsc-bedgraphtobigwig:377--h446ed27_1' }" + + input: + tuple val(meta), path(bedgraph) + path sizes + + output: + tuple val(meta), path("*.bigWig"), emit: bigwig + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '377' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + bedGraphToBigWig \\ + $bedgraph \\ + $sizes \\ + ${prefix}.bigWig + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ucsc: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml b/modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml new file mode 100644 index 000000000..1be1a3b7f --- /dev/null +++ b/modules/nf-core/modules/ucsc/bedgraphtobigwig/meta.yml @@ -0,0 +1,46 @@ +name: ucsc_bedgraphtobigwig +description: Convert a bedGraph file to bigWig format. +keywords: + - bedgraph + - bigwig +tools: + - ucsc: + description: Convert a bedGraph file to bigWig format. + homepage: None + documentation: None + tool_dev_url: None + doi: "" + licence: ["varies; see http://genome.ucsc.edu/license"] + +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bedgraph: + type: file + description: bedGraph file + pattern: "*.{bedGraph}" + - sizes: + type: file + description: chromosome sizes file + pattern: "*.{sizes}" + +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - bigwig: + type: file + description: bigWig file + pattern: "*.{bigWig}" + +authors: + - "@drpatelh" diff --git a/modules/nf-core/modules/untar/main.nf b/modules/nf-core/modules/untar/main.nf new file mode 100644 index 000000000..29ab10a50 --- /dev/null +++ b/modules/nf-core/modules/untar/main.nf @@ -0,0 +1,53 @@ +process UNTAR { + tag "$archive" + label 'process_low' + + conda (params.enable_conda ? "conda-forge::sed=4.7" : null) + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$untar"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + untar = archive.toString() - '.tar.gz' + + """ + mkdir output + + tar \\ + -C output --strip-components 1 \\ + -xzvf \\ + $args \\ + $archive \\ + $args2 + + mv output ${untar} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + untar = archive.toString() - '.tar.gz' + """ + touch $untar + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/modules/untar/meta.yml b/modules/nf-core/modules/untar/meta.yml new file mode 100644 index 000000000..d426919bd --- /dev/null +++ b/modules/nf-core/modules/untar/meta.yml @@ -0,0 +1,38 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - untar: + type: file + description: + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" diff --git a/nextflow.config b/nextflow.config index 71f62859d..e7c2b71d1 100644 --- a/nextflow.config +++ b/nextflow.config @@ -1,206 +1,272 @@ /* - * ------------------------------------------------- - * nf-core/chipseq Nextflow config file - * ------------------------------------------------- - * Default config options for all environments. - */ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + nf-core/chipseq Nextflow config file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Default config options for all compute environments +---------------------------------------------------------------------------------------- +*/ // Global default params, used in configs params { + // Input options + input = null + seq_center = null + fragment_size = 200 + fingerprint_bins = 500000 + read_length = null - // Options: Generic - input = './design.csv' - single_end = false - seq_center = false - fragment_size = 200 - fingerprint_bins = 500000 - - // Options: References - genome = false - save_reference = false - - // Options: Trimming - clip_r1 = 0 - clip_r2 = 0 - three_prime_clip_r1 = 0 - three_prime_clip_r2 = 0 - trim_nextseq = 0 - skip_trimming = false - save_trimmed = false - - // Options: Alignments - bwa_min_score = false - keep_dups = false - keep_multi_map = false - save_align_intermeds = false - - // Options: Peaks - narrow_peak = false - broad_cutoff = 0.1 - macs_fdr = false - macs_pvalue = false - min_reps_consensus = 1 - save_macs_pileup = false - skip_peak_qc = false - skip_peak_annotation = false - skip_consensus_peaks = false - - // Options: Differential analysis - deseq2_vst = false - skip_diff_analysis = false - - // Options: QC - skip_fastqc = false - skip_picard_metrics = false - skip_preseq = false - skip_plot_profile = false - skip_plot_fingerprint = false - skip_spp = false - skip_igv = false - skip_multiqc = false - - // Options: Config - multiqc_config = false - bamtools_filter_pe_config = "$baseDir/assets/bamtools_filter_pe.json" - bamtools_filter_se_config = "$baseDir/assets/bamtools_filter_se.json" - - // Options: Custom config - custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = false - config_profile_contact = false - config_profile_url = false - - // Options: Other - help = false - outdir = './results' - publish_dir_mode = 'copy' - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false - max_multiqc_email_size = 25.MB - tracedir = "${params.outdir}/pipeline_info" - email = false - email_on_fail = false - plaintext_email = false - monochrome_logs = false - name = false - hostnames = false - clusterOptions = false - - // Defaults only, expecting to be overwritten - max_memory = 128.GB - max_cpus = 16 - max_time = 240.h + // References + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes' + igenomes_ignore = false + save_reference = false -} + // Options: Trimming + clip_r1 = null + clip_r2 = null + three_prime_clip_r1 = null + three_prime_clip_r2 = null + trim_nextseq = null + skip_trimming = false + save_trimmed = false + + // Options: Alignment + aligner = 'bwa' + bwa_min_score = null + keep_dups = false + keep_multi_map = false + save_align_intermeds = false + save_unaligned = false + + // Options: Peaks + narrow_peak = false + broad_cutoff = 0.1 + macs_fdr = null + macs_pvalue = null + min_reps_consensus = 1 + save_macs_pileup = false + skip_peak_qc = false + skip_peak_annotation = false + skip_consensus_peaks = false + + // Options: DESeq2 QC + deseq2_vst = true + skip_deseq2_qc = false + + // Options: QC + skip_qc = false + skip_fastqc = false + skip_picard_metrics = false + skip_preseq = false + skip_plot_profile = false + skip_plot_fingerprint = false + skip_spp = false + skip_igv = false + skip_multiqc = false + + // Options: Config + bamtools_filter_pe_config = "$projectDir/assets/bamtools_filter_pe.json" + bamtools_filter_se_config = "$projectDir/assets/bamtools_filter_se.json" + + // MultiQC options + multiqc_config = null + multiqc_title = null + max_multiqc_email_size = '25.MB' -// Container slug. Stable releases should specify release tag! -// Developmental code should specify :dev -process.container = 'nfcore/chipseq:1.2.2' + // Boilerplate options + outdir = null + tracedir = "${params.outdir}/pipeline_info" + publish_dir_mode = 'copy' + email = null + email_on_fail = null + plaintext_email = false + monochrome_logs = false + help = false + validate_params = true + show_hidden_params = false + schema_ignore_params = 'genomes' + enable_conda = false + + // Config options + custom_config_version = 'master' + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + config_profile_description = null + config_profile_contact = null + config_profile_url = null + config_profile_name = null + + // Max resource options + // Defaults only, expecting to be overwritten + max_memory = '128.GB' + max_cpus = 16 + max_time = '240.h' + +} // Load base.config by default for all pipelines includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" + includeConfig "${params.custom_config_base}/nfcore_custom.config" } catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") + System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") } +// Load nf-core/chipseq custom profiles from different institutions. +// Warning: Uncomment only if a pipeline-specific instititutional config already exists on nf-core/configs! +// try { +// includeConfig "${params.custom_config_base}/pipeline/chipseq.config" +// } catch (Exception e) { +// System.err.println("WARNING: Could not load nf-core/config/chipseq profiles: ${params.custom_config_base}/pipeline/chipseq.config") +// } + profiles { - conda { process.conda = "$baseDir/environment.yml" } - debug { process.beforeScript = 'echo $HOSTNAME' } - docker { - docker.enabled = true - // Avoid this error: - // WARNING: Your kernel does not support swap limit capabilities or the cgroup is not mounted. Memory limited without swap. - // Testing this in nf-core after discussion here https://github.com/nf-core/tools/pull/351 - // once this is established and works well, nextflow might implement this behavior as new default. - docker.runOptions = '-u \$(id -u):\$(id -g)' - } - singularity { - singularity.enabled = true - singularity.autoMounts = true - } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + debug { process.beforeScript = 'echo $HOSTNAME' } + conda { + params.enable_conda = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + mamba { + params.enable_conda = true + conda.useMamba = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + docker { + docker.enabled = true + docker.userEmulation = true + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + singularity { + singularity.enabled = true + singularity.autoMounts = true + docker.enabled = false + podman.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + podman { + podman.enabled = true + docker.enabled = false + singularity.enabled = false + shifter.enabled = false + charliecloud.enabled = false + } + shifter { + shifter.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + charliecloud.enabled = false + } + charliecloud { + charliecloud.enabled = true + docker.enabled = false + singularity.enabled = false + podman.enabled = false + shifter.enabled = false + } + gitpod { + executor.name = 'local' + executor.cpus = 16 + executor.memory = 60.GB + } + test { includeConfig 'conf/test.config' } + test_full { includeConfig 'conf/test_full.config' } } // Load igenomes.config if required if (!params.igenomes_ignore) { - includeConfig 'conf/igenomes.config' + includeConfig 'conf/igenomes.config' +} else { + params.genomes = [:] } -// Increase time available to build conda environment -conda { createTimeout = "60 min" } - // Export these variables to prevent local Python/R libraries from conflicting with those in the container +// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. +// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. + env { - PYTHONNOUSERSITE = 1 - R_PROFILE_USER = "/.Rprofile" - R_ENVIRON_USER = "/.Renviron" + PYTHONNOUSERSITE = 1 + R_PROFILE_USER = "/.Rprofile" + R_ENVIRON_USER = "/.Renviron" + JULIA_DEPOT_PATH = "/usr/local/share/julia" } // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] +def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { - enabled = true - file = "${params.tracedir}/execution_timeline.html" + enabled = true + file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" } report { - enabled = true - file = "${params.tracedir}/execution_report.html" + enabled = true + file = "${params.tracedir}/execution_report_${trace_timestamp}.html" } trace { - enabled = true - file = "${params.tracedir}/execution_trace.txt" + enabled = true + file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" } dag { - enabled = true - file = "${params.tracedir}/pipeline_dag.svg" + enabled = true + file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" } manifest { - name = 'nf-core/chipseq' - author = 'Philip Ewels' - homePage = 'https://github.com/nf-core/chipseq' - description = 'ChIP-seq peak-calling and differential analysis pipeline.' - mainScript = 'main.nf' - nextflowVersion = '>=19.10.0' - version = '1.2.2' + name = 'nf-core/chipseq' + author = 'Espinosa-Carrasco J, Patel H, Wang C, Ewels P' + homePage = 'https://github.com/nf-core/chipseq' + description = 'ChIP-seq peak-calling and differential analysis pipeline.' + mainScript = 'main.nf' + nextflowVersion = '!>=21.10.3' + version = '2.0.0' } +// Load modules.config for DSL2 module specific options +includeConfig 'conf/modules.config' + // Function to ensure that resource requirements don't go beyond // a maximum limit def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj + if (type == 'memory') { + try { + if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) + return params.max_memory as nextflow.util.MemoryUnit + else + return obj + } catch (all) { + println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'time') { + try { + if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) + return params.max_time as nextflow.util.Duration + else + return obj + } catch (all) { + println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" + return obj + } + } else if (type == 'cpus') { + try { + return Math.min( obj, params.max_cpus as int ) + } catch (all) { + println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" + return obj + } } - } } diff --git a/nextflow_schema.json b/nextflow_schema.json index 8409c9d85..2a5f445c9 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -1,5 +1,5 @@ { - "$schema": "https://json-schema.org/draft-07/schema", + "$schema": "http://json-schema.org/draft-07/schema", "$id": "https://raw.githubusercontent.com/nf-core/chipseq/master/nextflow_schema.json", "title": "nf-core/chipseq pipeline parameters", "description": "ChIP-seq peak-calling and differential analysis pipeline.", @@ -8,88 +8,129 @@ "input_output_options": { "title": "Input/output options", "type": "object", + "fa_icon": "fas fa-terminal", + "description": "Define where the pipeline should find input data and save output data.", + "required": ["outdir"], "properties": { "input": { "type": "string", + "format": "file-path", + "mimetype": "text/csv", + "pattern": "^\\S+\\.csv$", + "schema": "assets/schema_input.json", "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/chipseq/docs/usage#--input).", + "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 5 columns, and a header row. See [usage docs](https://nf-co.re/chipseq/docs/usage#introduction).", "fa_icon": "fas fa-file-csv" }, - "single_end": { - "type": "boolean", - "description": "Specifies that the input is single-end reads.", - "fa_icon": "fas fa-align-center", - "default": false, - "help_text": "By default, the pipeline expects paired-end data. If you have single-end data, specify this parameter on the command line when you launch the pipeline. It is not possible to run a mixture of single-end and paired-end files in one run." - }, "fragment_size": { "type": "integer", - "default": 0, "description": "Estimated fragment size used to extend single-end reads.", "fa_icon": "fas fa-chart-area", - "help_text": "" + "default": 200 }, "seq_center": { "type": "string", "description": "Sequencing center information to be added to read group of BAM files.", "fa_icon": "fas fa-synagogue" }, + "read_length": { + "type": "integer", + "description": "Read length used to calculate MACS2 genome size for peak calling if `--macs_gsize` isn't provided.", + "fa_icon": "fas fa-chart-area", + "enum": [50, 75, 100, 150, 200] + }, "outdir": { "type": "string", - "description": "Path to the output directory where the results will be saved.", - "default": "./results", - "fa_icon": "fas fa-folder-open", - "help_text": "" + "format": "directory-path", + "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", + "fa_icon": "fas fa-folder-open" }, "email": { "type": "string", "description": "Email address for completion summary.", "fa_icon": "fas fa-envelope", - "help_text": "An email address to send a summary email to when the pipeline is completed.", + "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$" + }, + "multiqc_title": { + "type": "string", + "description": "MultiQC report title. Printed as page header, used for filename if not otherwise specified.", + "fa_icon": "fas fa-file-signature" } - }, - "required": [ - "input" - ], - "fa_icon": "fas fa-terminal" + } }, "reference_genome_options": { "title": "Reference genome options", "type": "object", + "fa_icon": "fas fa-dna", + "description": "Reference genome related files and options required for the workflow.", "properties": { "genome": { "type": "string", "description": "Name of iGenomes reference.", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`." + "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." }, "fasta": { "type": "string", - "description": "Path to Fasta reference file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "description": "Path to FASTA genome file.", + "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have the appropriate alignment index available this will be generated for you automatically. Combine with `--save_reference` to save alignment index for future runs.", "fa_icon": "far fa-file-code" }, "gtf": { "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.gtf(\\.gz)?$", "description": "Path to GTF annotation file.", - "fa_icon": "fas fa-file-invoice", + "fa_icon": "fas fa-code-branch", "help_text": "This parameter is *mandatory* if `--genome` is not specified." }, + "gff": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.gff(\\.gz)?$", + "fa_icon": "fas fa-code-branch", + "description": "Path to GFF3 annotation file.", + "help_text": "This parameter must be specified if `--genome` or `--gtf` are not specified." + }, "bwa_index": { "type": "string", - "description": "Full path to directory containing BWA index including base name. i.e. `/path/to/index/genome.fa`.", + "description": "Path to directory or tar.gz archive for pre-built BWA index.", + "fa_icon": "fas fa-bezier-curve" + }, + "bowtie2_index": { + "type": "string", + "format": "path", "fa_icon": "fas fa-bezier-curve", - "help_text": "" + "description": "Path to directory or tar.gz archive for pre-built Bowtie2 index." + }, + "chromap_index": { + "type": "string", + "format": "path", + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built Chromap index." + }, + "star_index": { + "type": "string", + "format": "path", + "fa_icon": "fas fa-bezier-curve", + "description": "Path to directory or tar.gz archive for pre-built STAR index." }, "gene_bed": { "type": "string", - "description": "Path to BED file containing gene intervals. This will be created from the GTF file if not specified.", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.bed(\\.gz)?$", "fa_icon": "fas fa-procedures", - "help_text": "" + "description": "Path to BED file containing gene intervals. This will be created from the GTF file if not specified." }, "macs_gsize": { - "type": "string", + "type": "number", "description": "Effective genome size parameter required by MACS2.", "help_text": "[Effective genome size](https://github.com/taoliu/MACS#-g--gsize) parameter required by MACS2. If using an iGenomes reference these have been provided when `--genome` is set as *GRCh37*, *GRCh38*, *GRCm38*, *WBcel235*, *BDGP6*, *R64-1-1*, *EF2*, *hg38*, *hg19* and *mm10*. For other genomes, if this parameter is not specified then the MACS2 peak-calling and differential analysis will be skipped.", "fa_icon": "fas fa-arrows-alt-h" @@ -102,97 +143,93 @@ }, "save_reference": { "type": "boolean", - "default": false, "description": "If generated by the pipeline save the BWA index in the results directory.", "help_text": "If the BWA index is generated by the pipeline use this parameter to save it to your results folder. These can then be used for future pipeline runs, reducing processing times.", "fa_icon": "fas fa-save" }, "igenomes_base": { "type": "string", + "format": "directory-path", "description": "Directory / URL base for iGenomes references.", - "default": "s3://ngi-igenomes/igenomes/", + "default": "s3://ngi-igenomes/igenomes", "fa_icon": "fas fa-cloud-download-alt", - "hidden": true, - "help_text": "" + "hidden": true }, "igenomes_ignore": { "type": "boolean", "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", "hidden": true, - "default": false, "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." } - }, - "fa_icon": "fas fa-dna" + } }, "adapter_trimming_options": { "title": "Adapter trimming options", "type": "object", + "fa_icon": "fas fa-cut", + "description": "Options to adjust adapter trimming criteria.", "properties": { "clip_r1": { "type": "integer", - "default": 0, "description": "Instructs Trim Galore to remove bp from the 5' end of read 1 (or single-end reads).", - "fa_icon": "fas fa-cut", - "help_text": "" + "fa_icon": "fas fa-cut" }, "clip_r2": { "type": "integer", - "default": 0, "description": "Instructs Trim Galore to remove bp from the 5' end of read 2 (paired-end reads only).", - "fa_icon": "fas fa-cut", - "help_text": "" + "fa_icon": "fas fa-cut" }, "three_prime_clip_r1": { "type": "integer", - "default": 0, "description": "Instructs Trim Galore to remove bp from the 3' end of read 1 AFTER adapter/quality trimming has been performed.", "fa_icon": "fas fa-cut" }, "three_prime_clip_r2": { "type": "integer", - "default": 0, "description": "Instructs Trim Galore to remove bp from the 3' end of read 2 AFTER adapter/quality trimming has been performed.", "fa_icon": "fas fa-cut" }, "trim_nextseq": { "type": "integer", - "default": 0, "description": "Instructs Trim Galore to apply the --nextseq=X option, to trim based on quality after removing poly-G tails.", "help_text": "This enables the option Cutadapt `--nextseq-trim=3'CUTOFF` option via Trim Galore, which will set a quality cutoff (that is normally given with -q instead), but qualities of G bases are ignored. This trimming is in common for the NextSeq- and NovaSeq-platforms, where basecalls without any signal are called as high-quality G bases.", "fa_icon": "fas fa-cut" }, "skip_trimming": { "type": "boolean", - "default": false, "description": "Skip the adapter trimming step.", "help_text": "Use this if your input FastQ files have already been trimmed outside of the workflow or if you're very confident that there is no adapter contamination in your data.", "fa_icon": "fas fa-fast-forward" }, "save_trimmed": { "type": "boolean", - "default": false, "description": "Save the trimmed FastQ files in the results directory.", "help_text": "By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete.", "fa_icon": "fas fa-save" } - }, - "fa_icon": "fas fa-cut" + } }, "alignment_options": { "title": "Alignment options", "type": "object", + "fa_icon": "fas fa-map-signs", + "description": "Options to adjust parameters and filtering criteria for read alignments.", "properties": { + "aligner": { + "type": "string", + "default": "bwa", + "description": "Specifies the alignment algorithm to use - available options are 'bwa', 'bowtie2' and 'star'.", + "fa_icon": "fas fa-map-signs", + "enum": ["bwa", "bowtie2", "chromap", "star"] + }, "keep_dups": { "type": "boolean", - "default": false, "description": "Duplicate reads are not filtered from alignments.", "fa_icon": "fas fa-cart-arrow-down" }, "keep_multi_map": { "type": "boolean", - "default": false, "description": "Reads mapping to multiple locations are not filtered from alignments.", "fa_icon": "fas fa-cart-arrow-down" }, @@ -203,37 +240,40 @@ }, "save_align_intermeds": { "type": "boolean", - "default": false, "description": "Save the intermediate BAM files from the alignment step.", "help_text": "By default, intermediate BAM files will not be saved. The final BAM files created after the appropriate filtering step are always saved to limit storage usage. Set this parameter to also save other intermediate BAM files.", "fa_icon": "fas fa-save" }, + "save_unaligned": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Where possible, save unaligned reads from either STAR, HISAT2 or Salmon to the results directory.", + "help_text": "This may either be in the form of FastQ or BAM files depending on the options available for that particular tool." + }, "bamtools_filter_pe_config": { "type": "string", - "default": "$baseDir/assets/bamtools_filter_pe.json", + "default": "$projectDir/assets/bamtools_filter_pe.json", "hidden": true, "description": "BAMTools JSON file with custom filters for paired-end data.", - "fa_icon": "fas fa-cog", - "help_text": "" + "fa_icon": "fas fa-cog" }, "bamtools_filter_se_config": { "type": "string", - "default": "$baseDir/assets/bamtools_filter_se.json", + "default": "$projectDir/assets/bamtools_filter_se.json", "hidden": true, "description": "BAMTools JSON file with custom filters for single-end data.", - "fa_icon": "fas fa-cog", - "help_text": "" + "fa_icon": "fas fa-cog" } - }, - "fa_icon": "fas fa-map-signs" + } }, "peak_calling_options": { "title": "Peak calling options", "type": "object", + "fa_icon": "fas fa-chart-area", + "description": "Options to adjust peak calling criteria.", "properties": { "narrow_peak": { "type": "boolean", - "default": false, "description": "Run MACS2 in narrowPeak mode.", "help_text": "MACS2 is run by default with the [`--broad`](https://github.com/taoliu/MACS#--broad) flag. Specify this flag to call peaks in narrowPeak mode.", "fa_icon": "fas fa-arrows-alt-h" @@ -258,125 +298,109 @@ "type": "integer", "default": 1, "description": "Number of biological replicates required from a given condition for a peak to contribute to a consensus peak.", - "help_text": "If you are confident you have good reproducibility amongst your replicates then you can increase the value of this parameter to create a \"reproducible\" set of consensus peaks. For example, a value of 2 will mean peaks that have been called in at least 2 replicates will contribute to the consensus set of peaks, and as such peaks that are unique to a given replicate will be discarded.", + "help_text": "If you are confident you have good reproducibility amongst your replicates then you can increase the value of this parameter to create a 'reproducible' set of consensus peaks. For example, a value of 2 will mean peaks that have been called in at least 2 replicates will contribute to the consensus set of peaks, and as such peaks that are unique to a given replicate will be discarded.", "fa_icon": "fas fa-sort-numeric-down" }, "save_macs_pileup": { "type": "boolean", - "default": false, "description": "Instruct MACS2 to create bedGraph files normalised to signal per million reads.", - "fa_icon": "fas fa-save", - "help_text": "" + "fa_icon": "fas fa-save" }, "skip_peak_qc": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip MACS2 peak QC plot generation.", - "default": false + "description": "Skip MACS2 peak QC plot generation." }, "skip_peak_annotation": { "type": "boolean", "fa_icon": "fas fa-fast-forward", - "description": "Skip annotation of MACS2 and consensus peaks with HOMER.", - "default": false + "description": "Skip annotation of MACS2 and consensus peaks with HOMER." }, "skip_consensus_peaks": { "type": "boolean", - "default": false, "description": "Skip consensus peak generation, annotation and counting.", - "fa_icon": "fas fa-fast-forward", - "help_text": "" - } - }, - "fa_icon": "fas fa-chart-area" - }, - "differential_analysis_options": { - "title": "Differential analysis options", - "type": "object", - "properties": { - "deseq2_vst": { - "type": "boolean", - "default": false, - "description": "Use vst transformation instead of rlog with DESeq2.", - "help_text": "See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization).", - "fa_icon": "fas fa-dolly" - }, - "skip_diff_analysis": { - "type": "boolean", - "default": false, - "description": "Skip differential accessibility analysis.", - "fa_icon": "fas fa-fast-forward", - "help_text": "" + "fa_icon": "fas fa-fast-forward" } - }, - "fa_icon": "fas fa-not-equal" + } }, "process_skipping_options": { "title": "Process skipping options", "type": "object", + "fa_icon": "fas fa-fast-forward", + "description": "Options to skip various steps within the workflow.", "properties": { "skip_fastqc": { "type": "boolean", - "default": false, "description": "Skip FastQC.", "fa_icon": "fas fa-fast-forward" }, "skip_picard_metrics": { "type": "boolean", - "default": false, "description": "Skip Picard CollectMultipleMetrics.", "fa_icon": "fas fa-fast-forward" }, "skip_preseq": { "type": "boolean", - "default": false, "description": "Skip Preseq.", "fa_icon": "fas fa-fast-forward" }, + "deseq2_vst": { + "type": "boolean", + "description": "Use vst transformation instead of rlog with DESeq2.", + "help_text": "See [DESeq2 docs](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#data-transformations-and-visualization).", + "fa_icon": "fas fa-dolly", + "default": true + }, "skip_plot_profile": { "type": "boolean", - "default": false, "description": "Skip deepTools plotProfile.", "fa_icon": "fas fa-fast-forward" }, "skip_plot_fingerprint": { "type": "boolean", - "default": false, "description": "Skip deepTools plotFingerprint.", "fa_icon": "fas fa-fast-forward" }, "skip_spp": { "type": "boolean", - "default": false, "description": "Skip Phantompeakqualtools.", "fa_icon": "fas fa-fast-forward" }, + "skip_deseq2_qc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip DESeq2 PCA and heatmap plotting." + }, "skip_igv": { "type": "boolean", - "default": false, "description": "Skip IGV.", "fa_icon": "fas fa-fast-forward" }, "skip_multiqc": { "type": "boolean", - "default": false, "description": "Skip MultiQC.", "fa_icon": "fas fa-fast-forward" + }, + "skip_qc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip all QC steps except for MultiQC." } - }, - "fa_icon": "fas fa-fast-forward" + } }, "institutional_config_options": { "title": "Institutional config options", "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", "properties": { "custom_config_version": { "type": "string", "description": "Git commit id for Institutional configs.", "default": "master", "hidden": true, - "fa_icon": "fas fa-users-cog", - "help_text": "" + "fa_icon": "fas fa-users-cog" }, "custom_config_base": { "type": "string", @@ -386,44 +410,42 @@ "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", "fa_icon": "fas fa-users-cog" }, - "hostnames": { + "config_profile_name": { "type": "string", - "description": "Institutional configs hostname.", + "description": "Institutional config name.", "hidden": true, - "fa_icon": "fas fa-users-cog", - "help_text": "" + "fa_icon": "fas fa-users-cog" }, "config_profile_description": { "type": "string", "description": "Institutional config description.", "hidden": true, - "fa_icon": "fas fa-users-cog", - "help_text": "" + "fa_icon": "fas fa-users-cog" }, "config_profile_contact": { "type": "string", "description": "Institutional config contact information.", "hidden": true, - "fa_icon": "fas fa-users-cog", - "help_text": "" + "fa_icon": "fas fa-users-cog" }, "config_profile_url": { "type": "string", "description": "Institutional config URL link.", "hidden": true, - "fa_icon": "fas fa-users-cog", - "help_text": "" + "fa_icon": "fas fa-users-cog" } - }, - "fa_icon": "fas fa-university" + } }, "max_job_request_options": { "title": "Max job request options", "type": "object", + "fa_icon": "fab fa-acquisitions-incorporated", + "description": "Set the top limit for requested resources for any single job.", + "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", "properties": { "max_cpus": { "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", + "description": "Maximum number of CPUs that can be requested for any single job.", "default": 16, "fa_icon": "fas fa-microchip", "hidden": true, @@ -434,6 +456,7 @@ "description": "Maximum amount of memory that can be requested for any single job.", "default": "128.GB", "fa_icon": "fas fa-memory", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "hidden": true, "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" }, @@ -442,28 +465,23 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", + "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } - }, - "fa_icon": "fab fa-acquisitions-incorporated" + } }, "generic_options": { "title": "Generic options", "type": "object", + "fa_icon": "fas fa-file-import", + "description": "Less common options for the pipeline, typically set in a config file.", + "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.", "properties": { "help": { "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", - "default": false, - "hidden": true - }, - "fingerprint_bins": { - "type": "integer", - "default": 500000, - "description": "Number of genomic bins to use when calculating deepTools fingerprint plot.", - "fa_icon": "fas fa-dumpster", "hidden": true }, "publish_dir_mode": { @@ -472,21 +490,14 @@ "description": "Method used to save pipeline results to output directory.", "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", "fa_icon": "fas fa-copy", - "enum": [ - "symlink", - "rellink", - "link", - "copy", - "copyNoFollow", - "mov" - ], + "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], "hidden": true }, - "name": { - "type": "string", - "description": "Workflow name.", - "fa_icon": "fas fa-address-card", - "help_text": "A custom name for the pipeline run. Unlike the core nextflow `-name` option with one hyphen this parameter can be reused multiple times, for example if using `-resume`. Passed through to steps such as MultiQC and used for things like report filenames and titles.", + "fingerprint_bins": { + "type": "integer", + "default": 500000, + "description": "Number of genomic bins to use when calculating deepTools fingerprint plot.", + "fa_icon": "fas fa-dumpster", "hidden": true }, "email_on_fail": { @@ -501,31 +512,26 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "default": false, - "help_text": "", "hidden": true }, "max_multiqc_email_size": { "type": "string", "description": "File size limit when attaching MultiQC reports to summary emails.", + "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", "default": "25.MB", "fa_icon": "fas fa-file-upload", - "help_text": "", "hidden": true }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "default": false, - "help_text": "", "hidden": true }, "multiqc_config": { "type": "string", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", - "help_text": "", "hidden": true }, "tracedir": { @@ -533,18 +539,29 @@ "description": "Directory to keep pipeline Nextflow logs and reports.", "default": "${params.outdir}/pipeline_info", "fa_icon": "fas fa-cogs", - "help_text": "", "hidden": true }, - "clusterOptions": { - "type": "string", - "description": "Arguments passed to Nextflow clusterOptions.", - "fa_icon": "fas fa-network-wired", - "help_text": "", + "validate_params": { + "type": "boolean", + "description": "Boolean whether to validate parameters against the schema at runtime", + "default": true, + "fa_icon": "fas fa-check-square", "hidden": true + }, + "show_hidden_params": { + "type": "boolean", + "fa_icon": "far fa-eye-slash", + "description": "Show all params when using `--help`", + "hidden": true, + "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "enable_conda": { + "type": "boolean", + "description": "Run this workflow with Conda. You can also use '-profile conda' instead of providing this parameter.", + "hidden": true, + "fa_icon": "fas fa-bacon" } - }, - "fa_icon": "fas fa-file-import" + } } }, "allOf": [ @@ -563,9 +580,6 @@ { "$ref": "#/definitions/peak_calling_options" }, - { - "$ref": "#/definitions/differential_analysis_options" - }, { "$ref": "#/definitions/process_skipping_options" }, diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..0d62beb6f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +# Config file for Python. Mostly used to configure linting of bin/check_samplesheet.py with Black. +# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation. +[tool.black] +line-length = 120 +target_version = ["py37", "py38", "py39", "py310"] + +[tool.isort] +profile = "black" +known_first_party = ["nf_core"] +multi_line_output = 3 diff --git a/subworkflows/local/filter_bam_bamtools.nf b/subworkflows/local/filter_bam_bamtools.nf new file mode 100644 index 000000000..40e9b1be6 --- /dev/null +++ b/subworkflows/local/filter_bam_bamtools.nf @@ -0,0 +1,36 @@ +/* + * Filter BAM file + */ + +include { BAM_FILTER } from '../../modules/local/bam_filter' +include { BAM_REMOVE_ORPHANS } from '../../modules/local/bam_remove_orphans' +include { BAM_SORT_SAMTOOLS } from '../nf-core/bam_sort_samtools' + +workflow FILTER_BAM_BAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ] + ch_bed // channel: [ bed ] + bamtools_filter_se_config // file: BAMtools filter JSON config file for SE data + bamtools_filter_pe_config // file: BAMtools filter JSON config file for PE data + + main: + ch_versions = Channel.empty() + + BAM_FILTER(ch_bam_bai, ch_bed, bamtools_filter_se_config, bamtools_filter_pe_config) + BAM_REMOVE_ORPHANS(BAM_FILTER.out.bam) + BAM_SORT_SAMTOOLS(BAM_REMOVE_ORPHANS.out.bam) + + ch_versions = ch_versions.mix(BAM_FILTER.out.versions, + BAM_REMOVE_ORPHANS.out.versions, + BAM_SORT_SAMTOOLS.out.versions) + + emit: + name_bam = BAM_REMOVE_ORPHANS.out.bam // channel: [ val(meta), [ bam ] ] + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/input_check.nf b/subworkflows/local/input_check.nf new file mode 100644 index 000000000..648a2971a --- /dev/null +++ b/subworkflows/local/input_check.nf @@ -0,0 +1,52 @@ +// +// Check input samplesheet and get read channels +// + +include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check' + +workflow INPUT_CHECK { + take: + samplesheet // file: /path/to/samplesheet.csv + seq_center // string: sequencing center for read group + + main: + SAMPLESHEET_CHECK ( samplesheet ) + .csv + .splitCsv ( header:true, sep:',' ) + .map { create_fastq_channel(it, seq_center) } + .set { reads } + + emit: + reads // channel: [ val(meta), [ reads ] ] + versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ] +} + +// Function to get list of [ meta, [ fastq_1, fastq_2 ] ] +def create_fastq_channel(LinkedHashMap row, String seq_center) { + def meta = [:] + meta.id = row.sample + meta.single_end = row.single_end.toBoolean() + meta.antibody = row.antibody + meta.control = row.control + + def read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\'" + if (seq_center) { + read_group = "\'@RG\\tID:${meta.id}\\tSM:${meta.id.split('_')[0..-2].join('_')}\\tPL:ILLUMINA\\tLB:${meta.id}\\tPU:1\\tCN:${seq_center}\'" + } + meta.read_group = read_group + + // add path(s) of the fastq file(s) to the meta map + def fastq_meta = [] + if (!file(row.fastq_1).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${row.fastq_1}" + } + if (meta.single_end) { + fastq_meta = [ meta, [ file(row.fastq_1) ] ] + } else { + if (!file(row.fastq_2).exists()) { + exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${row.fastq_2}" + } + fastq_meta = [ meta, [ file(row.fastq_1), file(row.fastq_2) ] ] + } + return fastq_meta +} diff --git a/subworkflows/local/prepare_genome.nf b/subworkflows/local/prepare_genome.nf new file mode 100644 index 000000000..9b6422c6a --- /dev/null +++ b/subworkflows/local/prepare_genome.nf @@ -0,0 +1,217 @@ +// +// Uncompress and prepare reference genome files +// + +include { + GUNZIP as GUNZIP_FASTA + GUNZIP as GUNZIP_GTF + GUNZIP as GUNZIP_GFF + GUNZIP as GUNZIP_GENE_BED + GUNZIP as GUNZIP_BLACKLIST } from '../../modules/nf-core/modules/gunzip/main' + +include { + UNTAR as UNTAR_BWA_INDEX + UNTAR as UNTAR_BOWTIE2_INDEX + UNTAR as UNTAR_CHROMAP_INDEX + UNTAR as UNTAR_STAR_INDEX } from '../../modules/nf-core/modules/untar/main' + +include { GFFREAD } from '../../modules/nf-core/modules/gffread/main' +include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/modules/custom/getchromsizes/main' +include { BWA_INDEX } from '../../modules/nf-core/modules/bwa/index/main' +include { BOWTIE2_BUILD } from '../../modules/nf-core/modules/bowtie2/build/main' +include { CHROMAP_INDEX } from '../../modules/nf-core/modules/chromap/index/main' + +include { GTF2BED } from '../../modules/local/gtf2bed' +include { GENOME_BLACKLIST_REGIONS } from '../../modules/local/genome_blacklist_regions' +include { STAR_GENOMEGENERATE } from '../../modules/local/star_genomegenerate' + +workflow PREPARE_GENOME { + take: + prepare_tool_index // string : tool to prepare index for + + main: + + ch_versions = Channel.empty() + + // + // Uncompress genome fasta file if required + // + ch_fasta = Channel.empty() + if (params.fasta.endsWith('.gz')) { + ch_fasta = GUNZIP_FASTA ( [ [:], params.fasta ] ).gunzip.map{ it[1] } + ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) + } else { + ch_fasta = file(params.fasta) + } + + // Make fasta file available if reference saved or IGV is run + if (params.save_reference || !params.skip_igv) { + file("${params.outdir}/genome/").mkdirs() + ch_fasta.copyTo("${params.outdir}/genome/") + } + + // + // Uncompress GTF annotation file or create from GFF3 if required + // + if (params.gtf) { + if (params.gtf.endsWith('.gz')) { + ch_gtf = GUNZIP_GTF ( [ [:], params.gtf ] ).gunzip.map{ it[1] } + ch_versions = ch_versions.mix(GUNZIP_GTF.out.versions) + } else { + ch_gtf = file(params.gtf) + } + } else if (params.gff) { + if (params.gff.endsWith('.gz')) { + ch_gff = GUNZIP_GFF ( [ [:], params.gff ] ).gunzip.map{ it[1] } + ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) + } else { + ch_gff = file(params.gff) + } + ch_gtf = GFFREAD ( ch_gff ).gtf + ch_versions = ch_versions.mix(GFFREAD.out.versions) + } + + // + // Uncompress blacklist file if required + // + ch_blacklist = Channel.empty() + if (params.blacklist) { + if (params.blacklist.endsWith('.gz')) { + ch_blacklist = GUNZIP_BLACKLIST ( [ [:], params.blacklist ] ).gunzip.map{ it[1] } + ch_versions = ch_versions.mix(GUNZIP_BLACKLIST.out.versions) + } else { + ch_blacklist = Channel.fromPath(file(params.blacklist)) + } + } + + // + // Uncompress gene BED annotation file or create from GTF if required + // + + // If --gtf is supplied along with --genome + // Make gene bed from supplied --gtf instead of using iGenomes one automatically + def make_bed = false + if (!params.gene_bed) { + make_bed = true + } else if (params.genome && params.gtf) { + if (params.genomes[ params.genome ].gtf != params.gtf) { + make_bed = true + } + } + + if (make_bed) { + ch_gene_bed = GTF2BED ( ch_gtf ).bed + ch_versions = ch_versions.mix(GTF2BED.out.versions) + } else { + if (params.gene_bed.endsWith('.gz')) { + ch_gene_bed = GUNZIP_GENE_BED ( [ [:], params.gene_bed ] ).gunzip.map{ it[1] } + ch_versions = ch_versions.mix(GUNZIP_GENE_BED.out.versions) + } else { + ch_gene_bed = file(params.gene_bed) + } + } + + // + // Create chromosome sizes file + // + ch_chrom_sizes = CUSTOM_GETCHROMSIZES ( ch_fasta ).sizes + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + + // + // Prepare genome intervals for filtering by removing regions in blacklist file + // + ch_genome_filtered_bed = Channel.empty() + + GENOME_BLACKLIST_REGIONS ( + CUSTOM_GETCHROMSIZES.out.sizes, + ch_blacklist.ifEmpty([]) + ) + ch_genome_filtered_bed = GENOME_BLACKLIST_REGIONS.out.bed + ch_versions = ch_versions.mix(GENOME_BLACKLIST_REGIONS.out.versions) + + + // + // Uncompress BWA index or generate from scratch if required + // + ch_bwa_index = Channel.empty() + if (prepare_tool_index == 'bwa') { + if (params.bwa_index) { + if (params.bwa_index.endsWith('.tar.gz')) { + ch_bwa_index = UNTAR_BWA_INDEX ( [ [:], params.bwa_index ] ).untar.map{ it[1] } + ch_versions = ch_versions.mix(UNTAR_BWA_INDEX.out.versions) + } else { + ch_bwa_index = file(params.bwa_index) + } + } else { + ch_bwa_index = BWA_INDEX ( ch_fasta ).index + ch_versions = ch_versions.mix(BWA_INDEX.out.versions) + } + } + + // + // Uncompress Bowtie2 index or generate from scratch if required + // + ch_bowtie2_index = Channel.empty() + if (prepare_tool_index == 'bowtie2') { + if (params.bowtie2_index) { + if (params.bowtie2_index.endsWith('.tar.gz')) { + ch_bowtie2_index = UNTAR_BOWTIE2_INDEX ( [ [:], params.bowtie2_index ] ).untar.map{ it[1] } + ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions) + } else { + ch_bowtie2_index = file(params.bowtie2_index) + } + } else { + ch_bowtie2_index = BOWTIE2_BUILD ( ch_fasta ).index + ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) + } + } + + // + // Uncompress CHROMAP index or generate from scratch if required + // + ch_chromap_index = Channel.empty() + if (prepare_tool_index == 'chromap') { + if (params.chromap_index) { + if (params.chromap_index.endsWith('.tar.gz')) { + ch_chromap_index = UNTAR_CHROMAP_INDEX ( [ [:], params.chromap_index ] ).untar.map{ it[1] } + ch_versions = ch_versions.mix(UNTAR.out.versions) + } else { + ch_chromap_index = file(params.chromap_index) + } + } else { + ch_chromap_index = CHROMAP_INDEX ( ch_fasta ).index + ch_versions = ch_versions.mix(CHROMAP_INDEX.out.versions) + } + } + + // + // Uncompress STAR index or generate from scratch if required + // + ch_star_index = Channel.empty() + if (prepare_tool_index == 'star') { + if (params.star_index) { + if (params.star_index.endsWith('.tar.gz')) { + ch_star_index = UNTAR_STAR_INDEX ( [ [:], params.star_index ] ).untar.map{ it[1] } + ch_versions = ch_versions.mix(UNTAR_STAR_INDEX.out.versions) + } else { + ch_star_index = file(params.star_index) + } + } else { + ch_star_index = STAR_GENOMEGENERATE ( ch_fasta, ch_gtf ).index + ch_versions = ch_versions.mix(STAR_GENOMEGENERATE.out.versions) + } + } + + emit: + fasta = ch_fasta // path: genome.fasta + gtf = ch_gtf // path: genome.gtf + gene_bed = ch_gene_bed // path: gene.bed + chrom_sizes = ch_chrom_sizes // path: genome.sizes + filtered_bed = ch_genome_filtered_bed // path: *.include_regions.bed + bwa_index = ch_bwa_index // path: bwa/index/ + bowtie2_index = ch_bowtie2_index // path: bowtie2/index/ + chromap_index = ch_chromap_index // path: genome.index + star_index = ch_star_index // path: star/index/ + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/align_bowtie2.nf b/subworkflows/nf-core/align_bowtie2.nf new file mode 100644 index 000000000..352191313 --- /dev/null +++ b/subworkflows/nf-core/align_bowtie2.nf @@ -0,0 +1,38 @@ +/* + * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { BOWTIE2_ALIGN } from '../../modules/nf-core/modules/bowtie2/align/main' +include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' + +workflow ALIGN_BOWTIE2 { + take: + reads // channel: [ val(meta), [ reads ] ] + index // path: /path/to/index + save_unaligned // boolean: true/false + + main: + + ch_versions = Channel.empty() + + // + // Map reads with BWA + // + BOWTIE2_ALIGN(reads, index, save_unaligned, false) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_SAMTOOLS(BOWTIE2_ALIGN.out.bam) + ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) + + emit: + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // path: versions.yml +} diff --git a/subworkflows/nf-core/align_bwa_mem.nf b/subworkflows/nf-core/align_bwa_mem.nf new file mode 100644 index 000000000..0c5dff084 --- /dev/null +++ b/subworkflows/nf-core/align_bwa_mem.nf @@ -0,0 +1,37 @@ +/* + * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { BWA_MEM } from '../../modules/nf-core/modules/bwa/mem/main' +include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' + +workflow ALIGN_BWA_MEM { + take: + reads // channel: [ val(meta), [ reads ] ] + index // path: /path/to/index + + main: + + ch_versions = Channel.empty() + + // + // Map reads with BWA + // + BWA_MEM(reads, index, false) + ch_versions = ch_versions.mix(BWA_MEM.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_SAMTOOLS(BWA_MEM.out.bam) + ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) + + emit: + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // path: versions.yml +} diff --git a/subworkflows/nf-core/align_chromap.nf b/subworkflows/nf-core/align_chromap.nf new file mode 100644 index 000000000..7eb739779 --- /dev/null +++ b/subworkflows/nf-core/align_chromap.nf @@ -0,0 +1,38 @@ +/* + * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { CHROMAP_CHROMAP } from '../../modules/nf-core/modules/chromap/chromap/main' +include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' + +workflow ALIGN_CHROMAP { + take: + reads // channel: [ val(meta), [ reads ] ] + index // path: /path/to/index + fasta // path: /path/to/fasta + + main: + + ch_versions = Channel.empty() + + // + // Map reads with CHROMAP + // + CHROMAP_CHROMAP(reads, fasta, index, [], [], [], []) + ch_versions = ch_versions.mix(CHROMAP_CHROMAP.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_SAMTOOLS(CHROMAP_CHROMAP.out.bam) + ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions.first()) + + emit: + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // path: versions.yml +} diff --git a/subworkflows/nf-core/align_star.nf b/subworkflows/nf-core/align_star.nf new file mode 100644 index 000000000..474621824 --- /dev/null +++ b/subworkflows/nf-core/align_star.nf @@ -0,0 +1,46 @@ +/* + * Map reads, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { STAR_ALIGN } from '../../modules/local/star_align' +include { BAM_SORT_SAMTOOLS } from './bam_sort_samtools' + +workflow ALIGN_STAR { + take: + reads // channel: [ val(meta), [ reads ] ] + index // channel: /path/to/star/index/ + + main: + + ch_versions = Channel.empty() + + // + // Map reads with STAR + // + STAR_ALIGN ( reads, index ) + ch_versions = ch_versions.mix(STAR_ALIGN.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_SAMTOOLS ( STAR_ALIGN.out.bam ) + ch_versions = ch_versions.mix(BAM_SORT_SAMTOOLS.out.versions) + + emit: + orig_bam = STAR_ALIGN.out.bam // channel: [ val(meta), bam ] + log_final = STAR_ALIGN.out.log_final // channel: [ val(meta), log_final ] + log_out = STAR_ALIGN.out.log_out // channel: [ val(meta), log_out ] + log_progress = STAR_ALIGN.out.log_progress // channel: [ val(meta), log_progress ] + bam_sorted = STAR_ALIGN.out.bam_sorted // channel: [ val(meta), bam_sorted ] + bam_transcript = STAR_ALIGN.out.bam_transcript // channel: [ val(meta), bam_transcript ] + fastq = STAR_ALIGN.out.fastq // channel: [ val(meta), fastq ] + tab = STAR_ALIGN.out.tab // channel: [ val(meta), tab ] + + bam = BAM_SORT_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_SORT_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_sort_samtools.nf b/subworkflows/nf-core/bam_sort_samtools.nf new file mode 100644 index 000000000..418e14cc0 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_samtools.nf @@ -0,0 +1,34 @@ +/* + * Sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { SAMTOOLS_SORT } from '../../modules/nf-core/modules/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' + +workflow BAM_SORT_SAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + + main: + + ch_versions = Channel.empty() + + SAMTOOLS_SORT(ch_bam) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + SAMTOOLS_INDEX(SAMTOOLS_SORT.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + BAM_STATS_SAMTOOLS(SAMTOOLS_SORT.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0])) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools.nf b/subworkflows/nf-core/bam_stats_samtools.nf new file mode 100644 index 000000000..89a7338f6 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools.nf @@ -0,0 +1,32 @@ +/* + * Run SAMtools stats, flagstat and idxstats + */ + +include { SAMTOOLS_STATS } from '../../modules/nf-core/modules/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/modules/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../modules/nf-core/modules/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), [ bam ], [bai] ] + + main: + + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, [] ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions.first()) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions.first()) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions.first()) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastqc_trimgalore.nf b/subworkflows/nf-core/fastqc_trimgalore.nf new file mode 100644 index 000000000..966541cad --- /dev/null +++ b/subworkflows/nf-core/fastqc_trimgalore.nf @@ -0,0 +1,48 @@ +// +// Read QC and trimming +// + +include { FASTQC } from '../../modules/nf-core/modules/fastqc/main' +include { TRIMGALORE } from '../../modules/nf-core/modules/trimgalore/main' + +workflow FASTQC_TRIMGALORE { + take: + reads // channel: [ val(meta), [ reads ] ] + skip_fastqc // boolean: true/false + skip_trimming // boolean: true/false + + main: + + ch_versions = Channel.empty() + fastqc_html = Channel.empty() + fastqc_zip = Channel.empty() + if (!skip_fastqc) { + FASTQC ( reads ).html.set { fastqc_html } + fastqc_zip = FASTQC.out.zip + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + } + + trim_reads = reads + trim_html = Channel.empty() + trim_zip = Channel.empty() + trim_log = Channel.empty() + if (!skip_trimming) { + TRIMGALORE ( reads ).reads.set { trim_reads } + trim_html = TRIMGALORE.out.html + trim_zip = TRIMGALORE.out.zip + trim_log = TRIMGALORE.out.log + ch_versions = ch_versions.mix(TRIMGALORE.out.versions.first()) + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + + fastqc_html // channel: [ val(meta), [ html ] ] + fastqc_zip // channel: [ val(meta), [ zip ] ] + + trim_html // channel: [ val(meta), [ html ] ] + trim_zip // channel: [ val(meta), [ zip ] ] + trim_log // channel: [ val(meta), [ txt ] ] + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/mark_duplicates_picard.nf b/subworkflows/nf-core/mark_duplicates_picard.nf new file mode 100644 index 000000000..33e88bf58 --- /dev/null +++ b/subworkflows/nf-core/mark_duplicates_picard.nf @@ -0,0 +1,42 @@ +/* + * Picard MarkDuplicates, sort, index BAM file and run samtools stats, flagstat and idxstats + */ + +include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/modules/picard/markduplicates/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/modules/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from './bam_stats_samtools' + +workflow MARK_DUPLICATES_PICARD { + take: + bam // channel: [ val(meta), [ bam ] ] + + main: + + ch_versions = Channel.empty() + + // + // Picard MarkDuplicates + // + PICARD_MARKDUPLICATES(bam) + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) + + // + // Index BAM file and run samtools stats, flagstat and idxstats + // + SAMTOOLS_INDEX(PICARD_MARKDUPLICATES.out.bam) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + BAM_STATS_SAMTOOLS(PICARD_MARKDUPLICATES.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0])) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), [ bam ] ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), [ metrics ] ] + + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/tower.yml b/tower.yml new file mode 100644 index 000000000..5b1f5f906 --- /dev/null +++ b/tower.yml @@ -0,0 +1,19 @@ +reports: + multiqc_report.html: + display: "MultiQC HTML report" + macs2_peak.plots.pdf: + display: "All samples MACS2 peak QC PDF plots" + macs2_annotatePeaks.plots.pdf: + display: "All samples HOMER annotatePeaks.pl QC PDF plots" + "*.consensus_peaks.plots.pdf": + display: "Consensus peaks DESeq2 QC PDF plots" + "*.consensus_peaks.boolean.intersect.plot.pdf": + display: "Consensus peaks UpSetR intersection PDF plots" + "*.consensus_peaks.boolean.annotatePeaks.txt": + display: "Consensus peaks annotated by HOMER" + "*.plotHeatmap.pdf": + display: "Per-sample deepTools plotHeatmap PDF plots" + "*_peaks.broadPeak": + display: "Per-sample MACS2 broadPeak file" + "*_peaks.narrowPeak": + display: "Per-sample MACS2 narrowPeak file" diff --git a/workflows/chipseq.nf b/workflows/chipseq.nf new file mode 100644 index 000000000..ffe995186 --- /dev/null +++ b/workflows/chipseq.nf @@ -0,0 +1,746 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def valid_params = [ + aligners : [ 'bwa', 'bowtie2', 'chromap', 'star' ] +] + +def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) + +// Validate input parameters +WorkflowChipseq.initialise(params, log, valid_params) + +// Check input path parameters to see if they exist +def checkPathParamList = [ + params.input, params.multiqc_config, + params.fasta, + params.gtf, params.gff, params.gene_bed, + params.bwa_index, params.bowtie2_index, params.chromap_index, params.star_index, + params.blacklist, + params.bamtools_filter_pe_config, params.bamtools_filter_se_config +] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' } + +// Save AWS IGenomes file containing annotation version +def anno_readme = params.genomes[ params.genome ]?.readme +if (anno_readme && file(anno_readme).exists()) { + file("${params.outdir}/genome/").mkdirs() + file(anno_readme).copyTo("${params.outdir}/genome/") +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? Channel.fromPath(params.multiqc_config) : Channel.empty() + +// JSON files required by BAMTools for alignment filtering +ch_bamtools_filter_se_config = file(params.bamtools_filter_se_config, checkIfExists: true) +ch_bamtools_filter_pe_config = file(params.bamtools_filter_pe_config, checkIfExists: true) + +// Header files for MultiQC +ch_spp_nsc_header = file("$projectDir/assets/multiqc/spp_nsc_header.txt", checkIfExists: true) +ch_spp_rsc_header = file("$projectDir/assets/multiqc/spp_rsc_header.txt", checkIfExists: true) +ch_spp_correlation_header = file("$projectDir/assets/multiqc/spp_correlation_header.txt", checkIfExists: true) +ch_peak_count_header = file("$projectDir/assets/multiqc/peak_count_header.txt", checkIfExists: true) +ch_frip_score_header = file("$projectDir/assets/multiqc/frip_score_header.txt", checkIfExists: true) +ch_peak_annotation_header = file("$projectDir/assets/multiqc/peak_annotation_header.txt", checkIfExists: true) +ch_deseq2_pca_header = file("$projectDir/assets/multiqc/deseq2_pca_header.txt", checkIfExists: true) +ch_deseq2_clustering_header = file("$projectDir/assets/multiqc/deseq2_clustering_header.txt", checkIfExists: true) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { BEDTOOLS_GENOMECOV } from '../modules/local/bedtools_genomecov' +include { FRIP_SCORE } from '../modules/local/frip_score' +include { PLOT_MACS2_QC } from '../modules/local/plot_macs2_qc' +include { PLOT_HOMER_ANNOTATEPEAKS } from '../modules/local/plot_homer_annotatepeaks' +include { MACS2_CONSENSUS } from '../modules/local/macs2_consensus' +include { ANNOTATE_BOOLEAN_PEAKS } from '../modules/local/annotate_boolean_peaks' +include { DESEQ2_QC } from '../modules/local/deseq2_qc' +include { IGV } from '../modules/local/igv' +include { MULTIQC } from '../modules/local/multiqc' +include { MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS } from '../modules/local/multiqc_custom_phantompeakqualtools' +include { MULTIQC_CUSTOM_PEAKS } from '../modules/local/multiqc_custom_peaks' + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { INPUT_CHECK } from '../subworkflows/local/input_check' +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome' +include { FILTER_BAM_BAMTOOLS } from '../subworkflows/local/filter_bam_bamtools' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// + +include { PICARD_MERGESAMFILES } from '../modules/nf-core/modules/picard/mergesamfiles/main' +include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/modules/picard/collectmultiplemetrics/main' +include { PRESEQ_LCEXTRAP } from '../modules/nf-core/modules/preseq/lcextrap/main' +include { PHANTOMPEAKQUALTOOLS } from '../modules/nf-core/modules/phantompeakqualtools/main' +include { UCSC_BEDGRAPHTOBIGWIG } from '../modules/nf-core/modules/ucsc/bedgraphtobigwig/main' +include { DEEPTOOLS_COMPUTEMATRIX } from '../modules/nf-core/modules/deeptools/computematrix/main' +include { DEEPTOOLS_PLOTPROFILE } from '../modules/nf-core/modules/deeptools/plotprofile/main' +include { DEEPTOOLS_PLOTHEATMAP } from '../modules/nf-core/modules/deeptools/plotheatmap/main' +include { DEEPTOOLS_PLOTFINGERPRINT } from '../modules/nf-core/modules/deeptools/plotfingerprint/main' +include { KHMER_UNIQUEKMERS } from '../modules/nf-core/modules/khmer/uniquekmers/main' +include { MACS2_CALLPEAK } from '../modules/nf-core/modules/macs2/callpeak/main' +include { SUBREAD_FEATURECOUNTS } from '../modules/nf-core/modules/subread/featurecounts/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/modules/custom/dumpsoftwareversions/main' + +include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_MACS2 } from '../modules/nf-core/modules/homer/annotatepeaks/main' +include { HOMER_ANNOTATEPEAKS as HOMER_ANNOTATEPEAKS_CONSENSUS } from '../modules/nf-core/modules/homer/annotatepeaks/main' + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// + +include { FASTQC_TRIMGALORE } from '../subworkflows/nf-core/fastqc_trimgalore' +include { ALIGN_BWA_MEM } from '../subworkflows/nf-core/align_bwa_mem' +include { ALIGN_BOWTIE2 } from '../subworkflows/nf-core/align_bowtie2' +include { ALIGN_CHROMAP } from '../subworkflows/nf-core/align_chromap' +include { ALIGN_STAR } from '../subworkflows/nf-core/align_star' +include { MARK_DUPLICATES_PICARD } from '../subworkflows/nf-core/mark_duplicates_picard' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def multiqc_report = [] + +workflow CHIPSEQ { + + ch_versions = Channel.empty() + + // + // SUBWORKFLOW: Uncompress and prepare reference genome files + // + PREPARE_GENOME ( + params.aligner + ) + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + + // + // SUBWORKFLOW: Read in samplesheet, validate and stage input files + // + INPUT_CHECK ( + file(params.input), + params.seq_center + ) + ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + + // + // SUBWORKFLOW: Read QC and trim adapters + // + FASTQC_TRIMGALORE ( + INPUT_CHECK.out.reads, + params.skip_fastqc || params.skip_qc, + params.skip_trimming + ) + ch_versions = ch_versions.mix(FASTQC_TRIMGALORE.out.versions) + + // + // SUBWORKFLOW: Alignment with BWA & BAM QC + // + ch_genome_bam = Channel.empty() + ch_genome_bam_index = Channel.empty() + ch_samtools_stats = Channel.empty() + ch_samtools_flagstat = Channel.empty() + ch_samtools_idxstats = Channel.empty() + if (params.aligner == 'bwa') { + ALIGN_BWA_MEM ( + FASTQC_TRIMGALORE.out.reads, + PREPARE_GENOME.out.bwa_index + ) + ch_genome_bam = ALIGN_BWA_MEM.out.bam + ch_genome_bam_index = ALIGN_BWA_MEM.out.bai + ch_samtools_stats = ALIGN_BWA_MEM.out.stats + ch_samtools_flagstat = ALIGN_BWA_MEM.out.flagstat + ch_samtools_idxstats = ALIGN_BWA_MEM.out.idxstats + ch_versions = ch_versions.mix(ALIGN_BWA_MEM.out.versions.first()) + } + + // + // SUBWORKFLOW: Alignment with Bowtie2 & BAM QC + // + if (params.aligner == 'bowtie2') { + ALIGN_BOWTIE2 ( + FASTQC_TRIMGALORE.out.reads, + PREPARE_GENOME.out.bowtie2_index, + params.save_unaligned + ) + ch_genome_bam = ALIGN_BOWTIE2.out.bam + ch_genome_bam_index = ALIGN_BOWTIE2.out.bai + ch_samtools_stats = ALIGN_BOWTIE2.out.stats + ch_samtools_flagstat = ALIGN_BOWTIE2.out.flagstat + ch_samtools_idxstats = ALIGN_BOWTIE2.out.idxstats + ch_versions = ch_versions.mix(ALIGN_BOWTIE2.out.versions.first()) + } + + // + // SUBWORKFLOW: Alignment with Chromap & BAM QC + // + if (params.aligner == 'chromap') { + ALIGN_CHROMAP ( + FASTQC_TRIMGALORE.out.reads, + PREPARE_GENOME.out.chromap_index, + PREPARE_GENOME.out.fasta + ) + + // Filter out paired-end reads until the issue below is fixed + // https://github.com/nf-core/chipseq/issues/291 + // ch_genome_bam = ALIGN_CHROMAP.out.bam + ALIGN_CHROMAP + .out + .bam + .branch { + meta, bam -> + single_end: meta.single_end + return [ meta, bam ] + paired_end: !meta.single_end + return [ meta, bam ] + } + .set { ch_genome_bam_chromap } + + ch_genome_bam_chromap + .paired_end + .collect() + .map { + it -> + def count = it.size() + if (count > 0) { + log.warn "=============================================================================\n" + + " Paired-end files produced by chromap cannot be used by some downstream tools due to the issue below:\n" + + " https://github.com/nf-core/chipseq/issues/291\n" + + " They will be excluded from the analysis. Consider using a different aligner\n" + + "===================================================================================" + } + } + + ch_genome_bam = ch_genome_bam_chromap.single_end + ch_genome_bam_index = ALIGN_CHROMAP.out.bai + ch_samtools_stats = ALIGN_CHROMAP.out.stats + ch_samtools_flagstat = ALIGN_CHROMAP.out.flagstat + ch_samtools_idxstats = ALIGN_CHROMAP.out.idxstats + ch_versions = ch_versions.mix(ALIGN_CHROMAP.out.versions.first()) + } + + // + // SUBWORKFLOW: Alignment with STAR & BAM QC + // + if (params.aligner == 'star') { + ALIGN_STAR ( + FASTQC_TRIMGALORE.out.reads, + PREPARE_GENOME.out.star_index + ) + ch_genome_bam = ALIGN_STAR.out.bam + ch_genome_bam_index = ALIGN_STAR.out.bai + ch_transcriptome_bam = ALIGN_STAR.out.bam_transcript + ch_samtools_stats = ALIGN_STAR.out.stats + ch_samtools_flagstat = ALIGN_STAR.out.flagstat + ch_samtools_idxstats = ALIGN_STAR.out.idxstats + ch_star_multiqc = ALIGN_STAR.out.log_final + + ch_versions = ch_versions.mix(ALIGN_STAR.out.versions) + } + + // + // MODULE: Merge resequenced BAM files + // + ch_genome_bam + .map { + meta, bam -> + def meta_clone = meta.clone() + meta_clone.remove('read_group') + meta_clone.id = meta_clone.id.split('_')[0..-2].join('_') + [ meta_clone, bam ] + } + .groupTuple(by: [0]) + .map { + it -> + [ it[0], it[1].flatten() ] + } + .set { ch_sort_bam } + + PICARD_MERGESAMFILES ( + ch_sort_bam + ) + ch_versions = ch_versions.mix(PICARD_MERGESAMFILES.out.versions.first().ifEmpty(null)) + + // + // SUBWORKFLOW: Mark duplicates & filter BAM files after merging + // + MARK_DUPLICATES_PICARD ( + PICARD_MERGESAMFILES.out.bam + ) + ch_versions = ch_versions.mix(MARK_DUPLICATES_PICARD.out.versions) + + // + // SUBWORKFLOW: Filter BAM file with BamTools + // + FILTER_BAM_BAMTOOLS ( + MARK_DUPLICATES_PICARD.out.bam.join(MARK_DUPLICATES_PICARD.out.bai, by: [0]), + PREPARE_GENOME.out.filtered_bed.first(), + ch_bamtools_filter_se_config, + ch_bamtools_filter_pe_config + ) + ch_versions = ch_versions.mix(FILTER_BAM_BAMTOOLS.out.versions.first().ifEmpty(null)) + + // + // MODULE: Preseq coverage analysis + // + ch_preseq_multiqc = Channel.empty() + if (!params.skip_preseq) { + PRESEQ_LCEXTRAP ( + MARK_DUPLICATES_PICARD.out.bam + ) + ch_preseq_multiqc = PRESEQ_LCEXTRAP.out.lc_extrap + ch_versions = ch_versions.mix(PRESEQ_LCEXTRAP.out.versions.first()) + } + + // + // MODULE: Picard post alignment QC + // + ch_picardcollectmultiplemetrics_multiqc = Channel.empty() + if (!params.skip_picard_metrics) { + PICARD_COLLECTMULTIPLEMETRICS ( + FILTER_BAM_BAMTOOLS.out.bam, + PREPARE_GENOME.out.fasta, + [] + ) + ch_picardcollectmultiplemetrics_multiqc = PICARD_COLLECTMULTIPLEMETRICS.out.metrics + ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) + } + + // + // MODULE: Phantompeaktools strand cross-correlation and QC metrics + // + PHANTOMPEAKQUALTOOLS ( + FILTER_BAM_BAMTOOLS.out.bam + ) + ch_versions = ch_versions.mix(PHANTOMPEAKQUALTOOLS.out.versions.first()) + + // + // MODULE: MultiQC custom content for Phantompeaktools + // + MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS ( + PHANTOMPEAKQUALTOOLS.out.spp.join(PHANTOMPEAKQUALTOOLS.out.rdata, by: [0]), + ch_spp_nsc_header, + ch_spp_rsc_header, + ch_spp_correlation_header + ) + + // + // MODULE: BedGraph coverage tracks + // + BEDTOOLS_GENOMECOV ( + FILTER_BAM_BAMTOOLS.out.bam.join(FILTER_BAM_BAMTOOLS.out.flagstat, by: [0]) + ) + ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions.first()) + + // + // MODULE: BigWig coverage tracks + // + UCSC_BEDGRAPHTOBIGWIG ( + BEDTOOLS_GENOMECOV.out.bedgraph, + PREPARE_GENOME.out.chrom_sizes + ) + ch_versions = ch_versions.mix(UCSC_BEDGRAPHTOBIGWIG.out.versions.first()) + + ch_deeptoolsplotprofile_multiqc = Channel.empty() + if (!params.skip_plot_profile) { + // + // MODULE: deepTools matrix generation for plotting + // + DEEPTOOLS_COMPUTEMATRIX ( + UCSC_BEDGRAPHTOBIGWIG.out.bigwig, + PREPARE_GENOME.out.gene_bed + ) + ch_versions = ch_versions.mix(DEEPTOOLS_COMPUTEMATRIX.out.versions.first()) + + // + // MODULE: deepTools profile plots + // + DEEPTOOLS_PLOTPROFILE ( + DEEPTOOLS_COMPUTEMATRIX.out.matrix + ) + ch_deeptoolsplotprofile_multiqc = DEEPTOOLS_PLOTPROFILE.out.table + ch_versions = ch_versions.mix(DEEPTOOLS_PLOTPROFILE.out.versions.first()) + + // + // MODULE: deepTools heatmaps + // + DEEPTOOLS_PLOTHEATMAP ( + DEEPTOOLS_COMPUTEMATRIX.out.matrix + ) + ch_versions = ch_versions.mix(DEEPTOOLS_PLOTHEATMAP.out.versions.first()) + } + + // + // Create channels: [ meta, [ ip_bam, control_bam ] [ ip_bai, control_bai ] ] + // + FILTER_BAM_BAMTOOLS + .out + .bam + .join(FILTER_BAM_BAMTOOLS.out.bai, by: [0]) + .set { ch_genome_bam_bai } + + ch_genome_bam_bai + .combine(ch_genome_bam_bai) + .map { + meta1, bam1, bai1, meta2, bam2, bai2 -> + meta1.control == meta2.id ? [ meta1, [ bam1, bam2 ], [ bai1, bai2 ] ] : null + } + .set { ch_ip_control_bam_bai } + + // + // MODULE: deepTools plotFingerprint joint QC for IP and control + // + ch_deeptoolsplotfingerprint_multiqc = Channel.empty() + if (!params.skip_plot_fingerprint) { + DEEPTOOLS_PLOTFINGERPRINT ( + ch_ip_control_bam_bai + ) + ch_deeptoolsplotfingerprint_multiqc = DEEPTOOLS_PLOTFINGERPRINT.out.matrix + ch_versions = ch_versions.mix(DEEPTOOLS_PLOTFINGERPRINT.out.versions.first()) + } + + // + // MODULE: Calculute genome size with khmer + // + ch_macs_gsize = Channel.empty() + ch_custompeaks_frip_multiqc = Channel.empty() + ch_custompeaks_count_multiqc = Channel.empty() + ch_plothomerannotatepeaks_multiqc = Channel.empty() + ch_subreadfeaturecounts_multiqc = Channel.empty() + ch_macs_gsize = params.macs_gsize + if (!params.macs_gsize) { + KHMER_UNIQUEKMERS ( + PREPARE_GENOME.out.fasta, + params.read_length + ) + ch_macs_gsize = KHMER_UNIQUEKMERS.out.kmers.map { it.text.trim() } + } + + // Create channels: [ meta, ip_bam, control_bam ] + ch_ip_control_bam_bai + .map { + meta, bams, bais -> + [ meta , bams[0], bams[1] ] + } + .set { ch_ip_control_bam } + + // + // MODULE: Call peaks with MACS2 + // + MACS2_CALLPEAK ( + ch_ip_control_bam, + ch_macs_gsize + ) + ch_versions = ch_versions.mix(MACS2_CALLPEAK.out.versions.first()) + + // + // Filter out samples with 0 MACS2 peaks called + // + MACS2_CALLPEAK + .out + .peak + .filter { meta, peaks -> peaks.size() > 0 } + .set { ch_macs2_peaks } + + // Create channels: [ meta, ip_bam, peaks ] + ch_ip_control_bam + .join(ch_macs2_peaks, by: [0]) + .map { + it -> + [ it[0], it[1], it[3] ] + } + .set { ch_ip_bam_peaks } + + // + // MODULE: Calculate FRiP score + // + FRIP_SCORE ( + ch_ip_bam_peaks + ) + ch_versions = ch_versions.mix(FRIP_SCORE.out.versions.first()) + + // Create channels: [ meta, peaks, frip ] + ch_ip_bam_peaks + .join(FRIP_SCORE.out.txt, by: [0]) + .map { + it -> + [ it[0], it[2], it[3] ] + } + .set { ch_ip_peaks_frip } + + // + // MODULE: FRiP score custom content for MultiQC + // + MULTIQC_CUSTOM_PEAKS ( + ch_ip_peaks_frip, + ch_peak_count_header, + ch_frip_score_header + ) + ch_custompeaks_frip_multiqc = MULTIQC_CUSTOM_PEAKS.out.frip + ch_custompeaks_count_multiqc = MULTIQC_CUSTOM_PEAKS.out.count + + if (!params.skip_peak_annotation) { + // + // MODULE: Annotate peaks with MACS2 + // + HOMER_ANNOTATEPEAKS_MACS2 ( + ch_macs2_peaks, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gtf + ) + ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS_MACS2.out.versions.first()) + + if (!params.skip_peak_qc) { + // + // MODULE: MACS2 QC plots with R + // + PLOT_MACS2_QC ( + ch_macs2_peaks.collect{it[1]} + ) + ch_versions = ch_versions.mix(PLOT_MACS2_QC.out.versions) + + // + // MODULE: Peak annotation QC plots with R + // + PLOT_HOMER_ANNOTATEPEAKS ( + HOMER_ANNOTATEPEAKS_MACS2.out.txt.collect{it[1]}, + ch_peak_annotation_header, + "_peaks.annotatePeaks.txt" + ) + ch_plothomerannotatepeaks_multiqc = PLOT_HOMER_ANNOTATEPEAKS.out.tsv + ch_versions = ch_versions.mix(PLOT_HOMER_ANNOTATEPEAKS.out.versions) + } + } + + // + // Consensus peaks analysis + // + ch_macs2_consensus_bed_lib = Channel.empty() + ch_macs2_consensus_txt_lib = Channel.empty() + ch_deseq2_pca_multiqc = Channel.empty() + ch_deseq2_clustering_multiqc = Channel.empty() + if (!params.skip_consensus_peaks) { + // Create channels: [ meta , [ peaks ] ] + // Where meta = [ id:antibody, multiple_groups:true/false, replicates_exist:true/false ] + ch_macs2_peaks + .map { + meta, peak -> + [ meta.antibody, meta.id.split('_')[0..-2].join('_'), peak ] + } + .groupTuple() + .map { + antibody, groups, peaks -> + [ + antibody, + groups.groupBy().collectEntries { [(it.key) : it.value.size()] }, + peaks + ] + } + .map { + antibody, groups, peaks -> + def meta_new = [:] + meta_new.id = antibody + meta_new.multiple_groups = groups.size() > 1 + meta_new.replicates_exist = groups.max { groups.value }.value > 1 + [ meta_new, peaks ] + } + .set { ch_antibody_peaks } + + // + // MODULE: Generate consensus peaks across samples + // + MACS2_CONSENSUS ( + ch_antibody_peaks + ) + ch_macs2_consensus_bed_lib = MACS2_CONSENSUS.out.bed + ch_macs2_consensus_txt_lib = MACS2_CONSENSUS.out.txt + ch_versions = ch_versions.mix(MACS2_CONSENSUS.out.versions) + + if (!params.skip_peak_annotation) { + // + // MODULE: Annotate consensus peaks + // + HOMER_ANNOTATEPEAKS_CONSENSUS ( + MACS2_CONSENSUS.out.bed, + PREPARE_GENOME.out.fasta, + PREPARE_GENOME.out.gtf + ) + ch_versions = ch_versions.mix(HOMER_ANNOTATEPEAKS_CONSENSUS.out.versions) + + // + // MODULE: Add boolean fields to annotated consensus peaks to aid filtering + // + ANNOTATE_BOOLEAN_PEAKS ( + MACS2_CONSENSUS.out.boolean_txt.join(HOMER_ANNOTATEPEAKS_CONSENSUS.out.txt, by: [0]), + ) + ch_versions = ch_versions.mix(ANNOTATE_BOOLEAN_PEAKS.out.versions) + } + + // Create channels: [ antibody, [ ip_bams ] ] + ch_ip_control_bam + .map { + meta, ip_bam, control_bam -> + [ meta.antibody, ip_bam ] + } + .groupTuple() + .set { ch_antibody_bams } + + // Create channels: [ meta, [ ip_bams ], saf ] + MACS2_CONSENSUS + .out + .saf + .map { + meta, saf -> + [ meta.id, meta, saf ] + } + .join(ch_antibody_bams) + .map { + antibody, meta, saf, bams -> + [ meta, bams.flatten().sort(), saf ] + } + .set { ch_saf_bams } + + // + // MODULE: Quantify peaks across samples with featureCounts + // + SUBREAD_FEATURECOUNTS ( + ch_saf_bams + ) + ch_subreadfeaturecounts_multiqc = SUBREAD_FEATURECOUNTS.out.summary + ch_versions = ch_versions.mix(SUBREAD_FEATURECOUNTS.out.versions.first()) + + if (!params.skip_deseq2_qc) { + // + // MODULE: Generate QC plots with DESeq2 + // + DESEQ2_QC ( + SUBREAD_FEATURECOUNTS.out.counts, + ch_deseq2_pca_header, + ch_deseq2_clustering_header + ) + ch_deseq2_pca_multiqc = DESEQ2_QC.out.pca_multiqc + ch_deseq2_clustering_multiqc = DESEQ2_QC.out.dists_multiqc + } + } + + // + // MODULE: Create IGV session + // + if (!params.skip_igv) { + IGV ( + params.aligner, + params.narrow_peak ? 'narrowPeak' : 'broadPeak', + PREPARE_GENOME.out.fasta, + UCSC_BEDGRAPHTOBIGWIG.out.bigwig.collect{it[1]}.ifEmpty([]), + ch_macs2_peaks.collect{it[1]}.ifEmpty([]), + ch_macs2_consensus_bed_lib.collect{it[1]}.ifEmpty([]), + ch_macs2_consensus_txt_lib.collect{it[1]}.ifEmpty([]) + ) + ch_versions = ch_versions.mix(IGV.out.versions) + } + + // + // MODULE: Pipeline reporting + // + CUSTOM_DUMPSOFTWAREVERSIONS ( + ch_versions.unique().collectFile(name: 'collated_versions.yml') + ) + + // + // MODULE: MultiQC + // + if (!params.skip_multiqc) { + workflow_summary = WorkflowChipseq.paramsSummaryMultiqc(workflow, summary_params) + ch_workflow_summary = Channel.value(workflow_summary) + + MULTIQC ( + ch_multiqc_config, + ch_multiqc_custom_config.collect().ifEmpty([]), + CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect(), + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), + + FASTQC_TRIMGALORE.out.fastqc_zip.collect{it[1]}.ifEmpty([]), + FASTQC_TRIMGALORE.out.trim_zip.collect{it[1]}.ifEmpty([]), + FASTQC_TRIMGALORE.out.trim_log.collect{it[1]}.ifEmpty([]), + + ch_samtools_stats.collect{it[1]}.ifEmpty([]), + ch_samtools_flagstat.collect{it[1]}.ifEmpty([]), + ch_samtools_idxstats.collect{it[1]}.ifEmpty([]), + + MARK_DUPLICATES_PICARD.out.stats.collect{it[1]}.ifEmpty([]), + MARK_DUPLICATES_PICARD.out.flagstat.collect{it[1]}.ifEmpty([]), + MARK_DUPLICATES_PICARD.out.idxstats.collect{it[1]}.ifEmpty([]), + MARK_DUPLICATES_PICARD.out.metrics.collect{it[1]}.ifEmpty([]), + + FILTER_BAM_BAMTOOLS.out.stats.collect{it[1]}.ifEmpty([]), + FILTER_BAM_BAMTOOLS.out.flagstat.collect{it[1]}.ifEmpty([]), + FILTER_BAM_BAMTOOLS.out.idxstats.collect{it[1]}.ifEmpty([]), + ch_picardcollectmultiplemetrics_multiqc.collect{it[1]}.ifEmpty([]), + + ch_preseq_multiqc.collect{it[1]}.ifEmpty([]), + + ch_deeptoolsplotprofile_multiqc.collect{it[1]}.ifEmpty([]), + ch_deeptoolsplotfingerprint_multiqc.collect{it[1]}.ifEmpty([]), + + PHANTOMPEAKQUALTOOLS.out.spp.collect{it[1]}.ifEmpty([]), + MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.nsc.collect{it[1]}.ifEmpty([]), + MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.rsc.collect{it[1]}.ifEmpty([]), + MULTIQC_CUSTOM_PHANTOMPEAKQUALTOOLS.out.correlation.collect{it[1]}.ifEmpty([]), + + ch_custompeaks_frip_multiqc.collect{it[1]}.ifEmpty([]), + ch_custompeaks_count_multiqc.collect{it[1]}.ifEmpty([]), + ch_plothomerannotatepeaks_multiqc.collect().ifEmpty([]), + ch_subreadfeaturecounts_multiqc.collect{it[1]}.ifEmpty([]), + + ch_deseq2_pca_multiqc.collect().ifEmpty([]), + ch_deseq2_clustering_multiqc.collect().ifEmpty([]) + ) + multiqc_report = MULTIQC.out.report.toList() + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + COMPLETION EMAIL AND SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow.onComplete { + if (params.email || params.email_on_fail) { + NfcoreTemplate.email(workflow, params, summary_params, projectDir, log, multiqc_report) + } + NfcoreTemplate.summary(workflow, params, log) +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/