Skip to content

Commit

Permalink
Merge pull request #196 from uclahs-cds/sfitz-prep-for-plot
Browse files Browse the repository at this point in the history
Sfitz prep for plot
  • Loading branch information
sorelfitzgibbon authored Jul 21, 2023
2 parents ddf386c + a0c532a commit e1aa7d9
Show file tree
Hide file tree
Showing 17 changed files with 64 additions and 59 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- Add regions filter to variant intersections, limiting output to main chromosome variants
- Add second BCFtools step to create full presence/absence variant table (including private)
- Add workflow to create a `consensus.vcf` that includes SNVs found by two or more variant callers
- Add `fix_sample_names_VCF`, tumor and normal sample IDs from input BAMs used in output VCFs
- Add `split_VCF_bcftools` to `Mutect2` workflow, separating SNVs, MNVs and Indels

### Changed
- reconfigure call_regions to intersect_regions
- Update to BCFtools v1.17
- Keep `bam-readcount` output in `SomaticSniper` QC folder
- Update `MuSE` to `v2.0.2`
- Update to use sample ID from input BAM files (single tumor/normal BAM input only)
Expand Down
2 changes: 1 addition & 1 deletion config/default.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ params {
strelka2_version = "2.9.10"
manta_version = "1.6.0"
MuSE_version = "2.0.2"
BCFtools_version = "1.15.1"
BCFtools_version = "1.17"
docker_image_samtools = "${-> params.docker_container_registry}/samtools:${params.samtools_version}"
docker_image_validate_params = "${-> params.docker_container_registry}/pipeval:${params.pipeval_version}"
docker_image_GATK = "broadinstitute/gatk:${params.GATK_version}"
Expand Down
12 changes: 3 additions & 9 deletions config/methods.config
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,8 @@ methods {
}
}

set_strelka2_params = {
if (params.containsKey("call_region") && params.call_region) {
params.use_call_region = true
} else {
params.call_region = "${params.work_dir}/NO_FILE.bed.gz"
params.use_call_region = false
}
params.call_region_index = "${params.call_region}.tbi"
set_intersect_regions_params = {
params.intersect_regions_index = "${params.intersect_regions}.tbi"
}

set_mutect2_params = {
Expand Down Expand Up @@ -198,7 +192,7 @@ methods {
retry.setup_retry()
methods.set_env()
methods.set_sample_params()
methods.set_strelka2_params()
methods.set_intersect_regions_params()
methods.set_mutect2_params()
methods.set_output_directory()
methods.set_pipeline_log()
Expand Down
11 changes: 7 additions & 4 deletions config/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,14 @@ exome:
required: false
default: false
help: 'The exome option when running manta and strelka2'
call_region:
intersect_regions:
type: 'Path'
required: false
default: '/hot/ref/tool-specific-input/Strelka2/GRCh38/strelka2_call_region.bed.gz'
help: 'A call region bed file for strelka2 to save runtime or for a targeted region'
required: true
help: 'call regions bed file used by mutect2, strelka2 and intersect'
use_intersect_regions:
type: 'Bool'
required: true
default: true
split_intervals_extra_args:
type: 'String'
required: false
Expand Down
3 changes: 2 additions & 1 deletion config/template.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ includeConfig "${projectDir}/config/methods.config"
params {
algorithm = [] // 'somaticsniper', 'strelka2', 'mutect2', 'muse'
reference = ''
intersect_regions = '/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/hg38_chromosomes_canonical.bed.gz'
output_dir = ''
dataset_id = ''
// set params.exome to TRUE will add the '--exome' option when running manta and strelka2
Expand All @@ -25,7 +26,7 @@ params {
tabix_extra_args = ''

// strelka2 options
call_region = '/hot/ref/tool-specific-input/Strelka2/GRCh38/strelka2_call_region.bed.gz'
use_intersect_regions = true

// mutect2 options
split_intervals_extra_args = ''
Expand Down
14 changes: 6 additions & 8 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ log.info """\
reference: ${params.reference}
reference_index: ${params.reference_index}
reference_dict: ${params.reference_dict}
call_region: ${params.call_region}
intersect_regions: ${params.intersect_regions}
- output:
output_dir: ${params.output_dir_base}
Expand Down Expand Up @@ -132,14 +132,12 @@ workflow {
file_to_validate = reference_ch
.mix (tumor_input.tumor_bam, tumor_input.tumor_index, normal_input.normal_bam, normal_input.normal_index)
}
if (params.use_call_region) {
file_to_validate = file_to_validate.mix(
Channel.from(
params.call_region,
params.call_region_index
)
file_to_validate = file_to_validate.mix(
Channel.from(
params.intersect_regions,
params.intersect_regions_index
)
}
)
run_validate_PipeVal(file_to_validate)
run_validate_PipeVal.out.validation_result.collectFile(
name: 'input_validation.txt', newLine: true,
Expand Down
2 changes: 1 addition & 1 deletion metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ Maintainers: ['[email protected]']
Contributors: ['Mao Tian', 'Bugh Caden', 'Helena Winata', 'Yash Patel', 'Sorel Fitz-Gibbon']
Languages: ['Docker', 'Nextflow']
Dependencies: ['Docker', 'Nextflow']
Tools: ['GATK 4.4.0.0', 'SomaticSniper v1.0.5.0', 'SAMtools v1.16.1', 'Strelka2 v2.9.10', 'Manta v1.6.0', 'MuSE v2.0.2', BCFtools v1.15.1]
Tools: ['GATK 4.4.0.0', 'SomaticSniper v1.0.5.0', 'SAMtools v1.16.1', 'Strelka2 v2.9.10', 'Manta v1.6.0', 'MuSE v2.0.2', BCFtools v1.17]
15 changes: 12 additions & 3 deletions module/intersect-processes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ log.info """\
====================================
Docker Images:
- docker_image_BCFtools: ${params.docker_image_BCFtools}
"""
"""
process intersect_VCFs_BCFtools {
container params.docker_image_BCFtools
publishDir path: "${params.workflow_output_dir}/output",
Expand All @@ -14,6 +14,9 @@ process intersect_VCFs_BCFtools {
publishDir path: "${params.workflow_output_dir}/output",
mode: "copy",
pattern: "isec-2-or-more"
publishDir path: "${params.workflow_output_dir}/output",
mode: "copy",
pattern: "isec-1-or-more/*.txt"
publishDir path: "${params.workflow_log_output_dir}",
mode: "copy",
pattern: ".command.*",
Expand All @@ -22,20 +25,26 @@ process intersect_VCFs_BCFtools {
input:
path vcfs
path indices
path intersect_regions
path intersect_regions_index

output:
path "*.vcf.gz", emit: consensus_vcf
path "*.vcf.gz.tbi", emit: consensus_idx
path ".command.*"
path "isec-2-or-more"
path "isec-1-or-more/sites.txt"
path "isec-1-or-more/README.txt"

script:

vcf_list = vcfs.join(' ')

"""
set -euo pipefail
# intersect keeping only variants that are present in at least 2 VCFs
# Use README.txt to rename output files to include sample names
bcftools isec --nfiles +2 --output-type z --prefix isec-2-or-more ${vcf_list}
awk '/Using the following file names:/{x=1;next} x' isec-2-or-more/README.txt | sed 's/.vcf.gz\$/-consensus-variants.vcf.gz/' | while read a b c d; do mv \$a \$d ; mv \$a.tbi \$d.tbi ; done
# intersect, keeping all variants, to create presence/absence list of variants in each VCF
bcftools isec --output-type z --prefix isec-1-or-more --regions-file ${intersect_regions} ${vcf_list}
"""
}
19 changes: 6 additions & 13 deletions module/intersect.nf
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
include { generate_sha512sum } from './common'
include { intersect_VCFs_BCFtools } from './intersect-processes.nf'
include { compress_index_VCF } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams(
options: [
output_dir: params.workflow_output_dir,
log_output_dir: params.workflow_log_output_dir,
bgzip_extra_args: params.bgzip_extra_args,
tabix_extra_args: params.tabix_extra_args
])


workflow intersect {
take:
Expand All @@ -17,15 +9,16 @@ workflow intersect {
main:
intersect_VCFs_BCFtools(
tool_vcfs,
tool_indices
)
tool_indices,
params.intersect_regions,
params.intersect_regions_index
)
file_for_sha512 = intersect_VCFs_BCFtools.out.consensus_vcf
.flatten()
.map{ it -> ["${file(it).getName().split('_')[0]}-SNV-vcf", it]}
.mix(intersect_VCFs_BCFtools.out.consensus_idx
.flatten()
.map{ it -> ["${file(it).getName().split('_')[0]}-SNV-idx", it]})
.map{ it -> ["${file(it).getName().split('_')[0]}-SNV-idx", it]}
)
generate_sha512sum(file_for_sha512)
emit:
intersect_VCFs_BCFtools.out.consensus_vcf
}
3 changes: 1 addition & 2 deletions module/mutect2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ workflow mutect2 {
if (params.intervals) {
intervals = params.intervals
} else {
intervals = "${projectDir}/config/hg38_chromosomes_canonical.list"

intervals = "${projectDir}/config/hg38_chromosomes_canonical.bed"
// process non-canonical chromosome regions seperately
// as this region requires more memory than the canonical regions
call_sSNVInNonAssembledChromosomes_Mutect2(
Expand Down
12 changes: 6 additions & 6 deletions module/strelka2-processes.nf
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ process call_sIndel_Manta {
path normal_index
path reference
path reference_index
path call_region
path call_region_index
path intersect_regions
path intersect_regions_index

output:
tuple path("MantaWorkflow/results/variants/candidateSmallIndels.vcf.gz"),
Expand All @@ -38,7 +38,7 @@ process call_sIndel_Manta {

script:
exome = params.exome ? "--exome" : ""
call_region_command = params.use_call_region ? "--callRegions ${call_region}" : ""
call_region_command = params.use_intersect_regions ? "--callRegions ${intersect_regions}" : ""
"""
configManta.py \
--normalBam $normal \
Expand Down Expand Up @@ -71,8 +71,8 @@ process call_sSNV_Strelka2 {
path reference
path reference_index
tuple path(indel_candidates), path(indel_candidates_index)
path call_region
path call_region_index
path intersect_regions
path intersect_regions_index

output:
tuple val("SNV"), path("StrelkaSomaticWorkflow/results/variants/somatic.snvs.vcf.gz"), emit: snvs_vcf
Expand All @@ -82,7 +82,7 @@ process call_sSNV_Strelka2 {

script:
exome = params.exome ? "--exome" : ""
call_region_command = params.use_call_region ? "--callRegions ${call_region}" : ""
call_region_command = params.use_intersect_regions ? "--callRegions ${intersect_regions}" : ""
"""
set -euo pipefail
configureStrelkaSomaticWorkflow.py \
Expand Down
9 changes: 4 additions & 5 deletions module/strelka2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ workflow strelka2 {
normal_index,
params.reference,
"${params.reference}.fai",
params.call_region,
params.call_region_index
params.intersect_regions,
params.intersect_regions_index
)
call_sSNV_Strelka2(
tumor_bam,
Expand All @@ -37,8 +37,8 @@ workflow strelka2 {
params.reference,
"${params.reference}.fai",
call_sIndel_Manta.out[0],
params.call_region,
params.call_region_index
params.intersect_regions,
params.intersect_regions_index
)
filter_VCF_BCFtools(call_sSNV_Strelka2.out.snvs_vcf
.mix(call_sSNV_Strelka2.out.indels_vcf))
Expand All @@ -58,4 +58,3 @@ workflow strelka2 {
.map{ it -> ["${it[2]}"] }

}

5 changes: 3 additions & 2 deletions test/config/a_mini-all-tools.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,19 @@ includeConfig "${projectDir}/config/methods.config"
params {
algorithm = ['somaticsniper', 'strelka2', 'mutect2', 'muse']
reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta'
intersect_regions = '/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/hg38_chromosomes_canonical.bed.gz'
dataset_id = 'TWGSAMIN'
// set params.exome to TRUE will add the '--exome' option when running manta and strelka2
// set params.exome to TRUE will add the '-E' option when running MuSE
exome = true
save_intermediate_files = true
save_intermediate_files = false

// module options
bgzip_extra_args = ''
tabix_extra_args = ''

// strelka2 options
call_region = '/hot/ref/tool-specific-input/Strelka2/GRCh38/strelka2_call_region.bed.gz'
use_intersect_regions = true

// mutect2 options
split_intervals_extra_args = ''
Expand Down
3 changes: 2 additions & 1 deletion test/config/a_mini-muse.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ includeConfig "${projectDir}/config/methods.config"
params {
algorithm = ['muse'] // 'somaticsniper', 'strelka2', 'mutect2', 'muse'
reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta'
intersect_regions = '/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/hg38_chromosomes_canonical.bed.gz'
dataset_id = 'TWGSAMIN'
// set params.exome to TRUE will add the '--exome' option when running manta and strelka2
// set params.exome to TRUE will add the '-E' option when running MuSE
Expand All @@ -24,7 +25,7 @@ params {
tabix_extra_args = ''

// strelka2 options
call_region = '/hot/ref/tool-specific-input/Strelka2/GRCh38/strelka2_call_region.bed.gz'
use_intersect_regions = true

// mutect2 options
split_intervals_extra_args = ''
Expand Down
3 changes: 2 additions & 1 deletion test/config/a_mini-mutect2.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ includeConfig "${projectDir}/config/methods.config"
params {
algorithm = ['mutect2'] // 'somaticsniper', 'strelka2', 'mutect2', 'muse'
reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta'
intersect_regions = '/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/hg38_chromosomes_canonical.bed.gz'
dataset_id = 'TWGSAMIN'
// set params.exome to TRUE will add the '--exome' option when running manta and strelka2
// set params.exome to TRUE will add the '-E' option when running MuSE
Expand All @@ -24,7 +25,7 @@ params {
tabix_extra_args = ''

// strelka2 options
call_region = '/hot/ref/tool-specific-input/Strelka2/GRCh38/strelka2_call_region.bed.gz'
use_intersect_regions = true

// mutect2 options
split_intervals_extra_args = ''
Expand Down
3 changes: 2 additions & 1 deletion test/config/a_mini-somaticsniper.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ includeConfig "${projectDir}/config/methods.config"
params {
algorithm = ['somaticsniper'] // 'somaticsniper', 'strelka2', 'mutect2', 'muse'
reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta'
intersect_regions = '/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/hg38_chromosomes_canonical.bed.gz'
dataset_id = 'TWGSAMIN'
// set params.exome to TRUE will add the '--exome' option when running manta and strelka2
// set params.exome to TRUE will add the '-E' option when running MuSE
Expand All @@ -24,7 +25,7 @@ params {
tabix_extra_args = ''

// strelka2 options
call_region = '/hot/ref/tool-specific-input/Strelka2/GRCh38/strelka2_call_region.bed.gz'
use_intersect_regions = true

// mutect2 options
split_intervals_extra_args = ''
Expand Down
3 changes: 2 additions & 1 deletion test/config/a_mini-strelka2.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ params {
algorithm = ['strelka2'] // 'somaticsniper', 'strelka2', 'mutect2', 'muse'
reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta'
dataset_id = 'TWGSAMIN'
intersect_regions = '/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/hg38_chromosomes_canonical.bed.gz'
// set params.exome to TRUE will add the '--exome' option when running manta and strelka2
// set params.exome to TRUE will add the '-E' option when running MuSE
exome = true
Expand All @@ -24,7 +25,7 @@ params {
tabix_extra_args = ''

// strelka2 options
call_region = '/hot/ref/tool-specific-input/Strelka2/GRCh38/strelka2_call_region.bed.gz'
use_intersect_regions = true

// mutect2 options
split_intervals_extra_args = ''
Expand Down

0 comments on commit e1aa7d9

Please sign in to comment.