diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..adcbd10f --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,4 @@ +# Test CODEOWNERS +# Default owner(s) +* @tyamaguchi-ucla @yashpatel6 @maotian06 @sorelfitzgibbon @uclahs-cds/nextflow-wg +# Folder specific owner diff --git a/CHANGELOG.md b/CHANGELOG.md index e388bef2..da2c971f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Add .github/CODEOWNERS - Add check for MuSE or Mutect2 on F2 node ### Changed +- Reorder all VCFs before intersection +- Move `filter_VCF_BCFtools` to `common.nf` - Fix blarchive compression log output directory - Delay readcount compression until original file is no longer needed diff --git a/main.nf b/main.nf index 5444bec7..44df748f 100755 --- a/main.nf +++ b/main.nf @@ -174,10 +174,10 @@ workflow { } // Set empty channels so any unused tools don't cause failure at intersect step - Channel.empty().set { somaticsniper_vcf_ch } - Channel.empty().set { strelka2_vcf_ch } - Channel.empty().set { mutect2_vcf_ch } - Channel.empty().set { muse_vcf_ch } + Channel.empty().set { somaticsniper_gzvcf_ch } + Channel.empty().set { strelka2_gzvcf_ch } + Channel.empty().set { mutect2_gzvcf_ch } + Channel.empty().set { muse_gzvcf_ch } Channel.empty().set { somaticsniper_idx_ch } Channel.empty().set { strelka2_idx_ch } @@ -193,7 +193,7 @@ workflow { run_GetSampleName_Mutect2_normal.out.name_ch, run_GetSampleName_Mutect2_tumor.out.name_ch ) - somaticsniper.out.vcf.set { somaticsniper_vcf_ch } + somaticsniper.out.gzvcf.set { somaticsniper_gzvcf_ch } somaticsniper.out.idx.set { somaticsniper_idx_ch } } if ('strelka2' in params.algorithm) { @@ -205,7 +205,7 @@ workflow { run_GetSampleName_Mutect2_normal.out.name_ch, run_GetSampleName_Mutect2_tumor.out.name_ch ) - strelka2.out.vcf.set { strelka2_vcf_ch } + strelka2.out.gzvcf.set { strelka2_gzvcf_ch } strelka2.out.idx.set { strelka2_idx_ch } } if ('mutect2' in params.algorithm) { @@ -216,7 +216,7 @@ workflow { normal_input.normal_index.collect(), tumor_input.contamination_est.collect() ) - mutect2.out.vcf.set { mutect2_vcf_ch } + mutect2.out.gzvcf.set { mutect2_gzvcf_ch } mutect2.out.idx.set { mutect2_idx_ch } } if ('muse' in params.algorithm) { @@ -228,30 +228,28 @@ workflow { run_GetSampleName_Mutect2_normal.out.name_ch, run_GetSampleName_Mutect2_tumor.out.name_ch ) - muse.out.vcf.set { muse_vcf_ch } + muse.out.gzvcf.set { muse_gzvcf_ch } muse.out.idx.set { muse_idx_ch } } // Intersect all vcf files if (params.algorithm.size() > 1) { - tool_vcfs = (somaticsniper_vcf_ch - .mix(strelka2_vcf_ch) - .mix(mutect2_vcf_ch) - .mix(muse_vcf_ch)) + tool_gzvcfs = (somaticsniper_gzvcf_ch + .mix(strelka2_gzvcf_ch) + .mix(mutect2_gzvcf_ch) + .mix(muse_gzvcf_ch)) .collect() - tool_indices = (somaticsniper_idx_ch .mix(strelka2_idx_ch) .mix(mutect2_idx_ch) .mix(muse_idx_ch)) .collect() - intersect( - tool_vcfs, + tool_gzvcfs, tool_indices, script_dir_ch, - run_GetSampleName_Mutect2_normal.out.name_ch, - run_GetSampleName_Mutect2_tumor.out.name_ch + run_GetSampleName_Mutect2_normal.out.name_ch.first(), + run_GetSampleName_Mutect2_tumor.out.name_ch.first() ) } } diff --git a/module/common.nf b/module/common.nf index a63ca7d2..4534fe6e 100644 --- a/module/common.nf +++ b/module/common.nf @@ -8,6 +8,31 @@ Docker Images: - docker_image_blarchive: ${params.docker_image_blarchive} """ +process filter_VCF_BCFtools { + container params.docker_image_BCFtools + publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}", + mode: "copy", + pattern: "*.vcf.gz", + enabled: params.save_intermediate_files + publishDir path: "${params.workflow_log_output_dir}", + mode: "copy", + pattern: ".command.*", + saveAs: { "${task.process.split(':')[-1]}-${var_type}/log${file(it).getName()}" } + + input: + tuple val(var_type), path(vcf) + + output: + tuple val(var_type), path("*.vcf.gz"), emit: gzvcf + path ".command.*" + + script: + """ + set -euo pipefail + bcftools view -f PASS --output-type z --output ${params.output_filename}_${var_type}-pass.vcf.gz ${vcf} + """ + } + process generate_sha512sum { container params.docker_image_validate_params publishDir path: "${params.workflow_output_dir}/output", @@ -52,7 +77,7 @@ process rename_samples_BCFtools { tuple val(var_type), path(vcf) output: - tuple val(var_type), path("*.vcf.gz"), emit: fix_vcf + tuple val(var_type), path("*.vcf.gz"), emit: gzvcf path ".command.*" path "*_samples.txt" diff --git a/module/intersect-processes.nf b/module/intersect-processes.nf index 6fac58de..d2e84f2b 100644 --- a/module/intersect-processes.nf +++ b/module/intersect-processes.nf @@ -10,6 +10,36 @@ Intersect Options: - vcf2maf_extra_args: ${params.vcf2maf_extra_args} ==================================== """ +process reorder_samples_BCFtools { + container params.docker_image_BCFtools + publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}", + mode: "copy", + pattern: "*.vcf.gz", + enabled: params.save_intermediate_files + publishDir path: "${params.workflow_log_output_dir}", + mode: "copy", + pattern: ".command.*", + saveAs: { "${task.process.split(':')[-1]}-${algorithm}/log${file(it).getName()}" } + + input: + tuple val(algorithm), path(gzvcf) + path indices + val tumor_id + val normal_id + + output: + path "*-reorder.vcf.gz", emit: gzvcf + path ".command.*" + + script: + """ + set -euo pipefail + infile=\$(basename ${gzvcf} .vcf.gz) + outfile="\${infile}-reorder.vcf.gz" + bcftools view -s ${tumor_id},${normal_id} --output \${outfile} ${gzvcf} + """ + } + process intersect_VCFs_BCFtools { container params.docker_image_BCFtools publishDir path: "${params.workflow_output_dir}/output", @@ -29,20 +59,20 @@ process intersect_VCFs_BCFtools { saveAs: { "${task.process.split(':')[-1]}/log${file(it).getName()}" } input: - path vcfs + path gzvcf path indices path intersect_regions path intersect_regions_index output: - path "*.vcf.gz", emit: intersect_vcf - path "*.vcf.gz.tbi", emit: intersect_idx + path "*.vcf.gz", emit: gzvcf + path "*.vcf.gz.tbi", emit: idx path ".command.*" path "isec-2-or-more/*.txt" path "isec-1-or-more/*.txt", emit: isec script: - vcf_list = vcfs.join(' ') + vcf_list = gzvcf.join(' ') regions_command = params.use_intersect_regions ? "--regions-file ${intersect_regions}" : "" """ set -euo pipefail @@ -54,13 +84,13 @@ process intersect_VCFs_BCFtools { ${regions_command} \ ${vcf_list} awk '/Using the following file names:/{x=1;next} x' isec-2-or-more/README.txt \ - | sed 's/.vcf.gz\$/-intersect.vcf.gz/' \ + | sed 's/-reorder.vcf.gz\$/-intersect.vcf.gz/' \ | while read a b c d; do mv \$a \$d mv \$a.tbi \$d.tbi done # intersect, keeping all variants, to create presence/absence list of variants in each VCF - bcftools isec \ + bcftools isec --nfiles +1\ --output-type z \ --prefix isec-1-or-more \ ${regions_command} \ @@ -109,7 +139,7 @@ process concat_VCFs_BCFtools { path indices output: - path "*concat.vcf", emit: concat_vcf + path "*concat.vcf", emit: vcf path ".command.*" script: @@ -146,7 +176,7 @@ process convert_VCF_vcf2maf { val tumor_id output: - path "*.maf", emit: concat_maf + path "*.maf", emit: maf path ".command.*" script: diff --git a/module/intersect.nf b/module/intersect.nf index 94803ba3..83f6d86b 100644 --- a/module/intersect.nf +++ b/module/intersect.nf @@ -3,8 +3,16 @@ include { compress_file_blarchive} from './common' addParams( blarchive_publishDir : "${params.workflow_output_dir}/output", blarchive_enabled : true ) -include { intersect_VCFs_BCFtools; plot_VennDiagram_R; concat_VCFs_BCFtools ; convert_VCF_vcf2maf } from './intersect-processes.nf' -include { compress_index_VCF } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( +include { reorder_samples_BCFtools; intersect_VCFs_BCFtools; plot_VennDiagram_R; concat_VCFs_BCFtools ; convert_VCF_vcf2maf } from './intersect-processes.nf' +include { compress_index_VCF as compress_index_VCF_reordered } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( + options: [ + output_dir: params.workflow_output_dir, + log_output_dir: params.workflow_log_output_dir, + bgzip_extra_args: params.bgzip_extra_args, + tabix_extra_args: params.tabix_extra_args, + is_output_file: false + ]) +include { compress_index_VCF as compress_index_VCF_concat } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( options: [ output_dir: params.workflow_output_dir, log_output_dir: params.workflow_log_output_dir, @@ -16,23 +24,45 @@ def sortVcfs(List paths) { def toolA = file(a).getName() def toolB = file(b).getName() return toolA.compareTo(toolB) + } + } +def getToolName(filename) { + return file(filename).getName().split('-')[0] } -} workflow intersect { take: - tool_vcfs + tool_gzvcfs tool_indices script_dir_ch normal_id tumor_id main: - vcfs_ch = tool_vcfs + tool_gzvcfs_ch = tool_gzvcfs + .flatten() + .map{ it -> [getToolName(it), it]} + tool_indices_ch = tool_indices + .flatten() + reorder_samples_BCFtools( + tool_gzvcfs_ch, + tool_indices_ch, + normal_id, + tumor_id + ) + compress_index_VCF_reordered(reorder_samples_BCFtools.out.gzvcf + .map{ it -> ["${getToolName(it)}-SNV", it]} + ) + gzvcfs = compress_index_VCF_reordered.out.index_out + .map{ it -> it[1] } + .collect() .map { sortVcfs(it) } + indices = compress_index_VCF_reordered.out.index_out + .map{ it -> it[2] } + .collect() intersect_VCFs_BCFtools( - vcfs_ch, - tool_indices, + gzvcfs, + indices, params.intersect_regions, params.intersect_regions_index ) @@ -40,35 +70,35 @@ workflow intersect { script_dir_ch, intersect_VCFs_BCFtools.out.isec, ) - intersect_vcfs_ch = intersect_VCFs_BCFtools.out.intersect_vcf + intersect_vcfs = intersect_VCFs_BCFtools.out.gzvcf .map { sortVcfs(it) } concat_VCFs_BCFtools( - intersect_vcfs_ch, - intersect_VCFs_BCFtools.out.intersect_idx + intersect_vcfs, + intersect_VCFs_BCFtools.out.idx ) convert_VCF_vcf2maf( - concat_VCFs_BCFtools.out.concat_vcf, + concat_VCFs_BCFtools.out.vcf, params.reference, normal_id, tumor_id ) - compress_index_VCF(concat_VCFs_BCFtools.out.concat_vcf + compress_index_VCF_concat(concat_VCFs_BCFtools.out.vcf .map{ it -> ['SNV', it]} ) - compress_file_blarchive(convert_VCF_vcf2maf.out.concat_maf + compress_file_blarchive(convert_VCF_vcf2maf.out.maf .map{ it -> ['MAF', it]} ) - file_for_sha512 = intersect_VCFs_BCFtools.out.intersect_vcf + file_for_sha512 = intersect_VCFs_BCFtools.out.gzvcf .flatten() - .map{ it -> ["${file(it).getName().split('_')[0]}-SNV-vcf", it]} - .mix(intersect_VCFs_BCFtools.out.intersect_idx + .map{ it -> ["${getToolName(it)}-vcf", it]} + .mix(intersect_VCFs_BCFtools.out.idx .flatten() - .map{ it -> ["${file(it).getName().split('_')[0]}-SNV-idx", it]} + .map{ it -> ["${getToolName(it)}-idx", it]} ) - .mix(compress_index_VCF.out.index_out + .mix(compress_index_VCF_concat.out.index_out .map{ it -> ["concat-${it[0]}-vcf", it[1]] } ) - .mix(compress_index_VCF.out.index_out + .mix(compress_index_VCF_concat.out.index_out .map{ it -> ["concat-${it[0]}-index", it[2]] } ) .mix(compress_file_blarchive.out.compressed_file diff --git a/module/muse-processes.nf b/module/muse-processes.nf index 2a618362..33a10cb7 100644 --- a/module/muse-processes.nf +++ b/module/muse-processes.nf @@ -77,53 +77,3 @@ process run_sump_MuSE { -D $dbSNP """ } - -process filter_VCF_BCFtools { - container params.docker_image_BCFtools - publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}", - mode: "copy", - pattern: "*.vcf.gz", - enabled: params.save_intermediate_files - publishDir path: "${params.workflow_log_output_dir}", - mode: "copy", - pattern: ".command.*", - saveAs: { "${task.process.split(':')[-1]}-${var_type}/log${file(it).getName()}" } - - input: - tuple val(var_type), path(vcf) - - output: - tuple val(var_type), path("*.vcf.gz"), emit: pass_vcf - path ".command.*" - - script: - """ - set -euo pipefail - bcftools view -f PASS --output-type z --output ${params.output_filename}_${var_type}-pass.vcf.gz ${vcf} - """ - } - -process reorder_samples_BCFtools { - container params.docker_image_BCFtools - publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}", - mode: "copy", - pattern: "*.vcf.gz", - enabled: params.save_intermediate_files - publishDir path: "${params.workflow_log_output_dir}", - mode: "copy", - pattern: ".command.*", - saveAs: { "${task.process.split(':')[-1]}-${var_type}/log${file(it).getName()}" } - - input: - tuple val(var_type), path(vcf) - - output: - tuple val(var_type), path("*.vcf.gz"), emit: reorder_vcf - path ".command.*" - - script: - """ - set -euo pipefail - bcftools view -s NORMAL,TUMOR --output ${params.output_filename}_pass-reorder.vcf.gz ${vcf} - """ - } diff --git a/module/muse.nf b/module/muse.nf index 6bf5c9cf..8f45c39c 100644 --- a/module/muse.nf +++ b/module/muse.nf @@ -1,5 +1,5 @@ -include { call_sSNV_MuSE; run_sump_MuSE; filter_VCF_BCFtools; reorder_samples_BCFtools } from './muse-processes' -include { rename_samples_BCFtools; generate_sha512sum } from './common' +include { call_sSNV_MuSE; run_sump_MuSE } from './muse-processes' +include { filter_VCF_BCFtools; rename_samples_BCFtools; generate_sha512sum } from './common' include { compress_index_VCF } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( options: [ output_dir: params.workflow_output_dir, @@ -31,14 +31,12 @@ workflow muse { "${params.dbSNP}.tbi" ) filter_VCF_BCFtools(run_sump_MuSE.out.vcf.map { it -> ['SNV', it] } ) - // MuSE output VCF has sample order: TUMOR NORMAL, opposite of all other tools. Need to reorder. - reorder_samples_BCFtools(filter_VCF_BCFtools.out.pass_vcf) - rename_samples_BCFtools(normal_id, tumor_id, reorder_samples_BCFtools.out.reorder_vcf) - compress_index_VCF(rename_samples_BCFtools.out.fix_vcf) + rename_samples_BCFtools(normal_id, tumor_id, filter_VCF_BCFtools.out.gzvcf) + compress_index_VCF(rename_samples_BCFtools.out.gzvcf) file_for_sha512 = compress_index_VCF.out.index_out.map{ it -> ["muse-${it[0]}-vcf", it[1]] } .mix(compress_index_VCF.out.index_out.map{ it -> ["muse-${it[0]}-index", it[2]] }) generate_sha512sum(file_for_sha512) emit: - vcf = compress_index_VCF.out.index_out.map{ it -> ["${it[1]}"] } + gzvcf = compress_index_VCF.out.index_out.map{ it -> ["${it[1]}"] } idx = compress_index_VCF.out.index_out.map{ it -> ["${it[2]}"] } } diff --git a/module/mutect2-processes.nf b/module/mutect2-processes.nf index 137d0438..529a8f9f 100644 --- a/module/mutect2-processes.nf +++ b/module/mutect2-processes.nf @@ -263,31 +263,6 @@ process run_FilterMutectCalls_GATK { """ } -process filter_VCF_BCFtools { - container params.docker_image_BCFtools - publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}", - mode: "copy", - pattern: "*.vcf.gz", - enabled: params.save_intermediate_files - publishDir path: "${params.workflow_log_output_dir}", - mode: "copy", - pattern: ".command.*", - saveAs: { "${task.process.split(':')[-1]}-${var_type}/log${file(it).getName()}" } - - input: - tuple val(var_type), path(vcf) - - output: - tuple val(var_type), path("*.vcf.gz"), emit: pass_vcf - path ".command.*" - - script: - """ - set -euo pipefail - bcftools view -f PASS --output-type z --output ${params.output_filename}_${var_type}-pass.vcf.gz ${vcf} - """ - } - process split_VCF_BCFtools { container params.docker_image_BCFtools publishDir path: "${params.workflow_output_dir}/output", @@ -303,7 +278,7 @@ process split_VCF_BCFtools { each var_type output: - tuple val(var_type), path("*.vcf.gz"), emit: split_vcf + tuple val(var_type), path("*.vcf.gz"), emit: gzvcf path ".command.*" script: diff --git a/module/mutect2.nf b/module/mutect2.nf index 3d0b92d1..fd245972 100644 --- a/module/mutect2.nf +++ b/module/mutect2.nf @@ -1,5 +1,5 @@ -include { run_GetSampleName_Mutect2; run_SplitIntervals_GATK; call_sSNV_Mutect2; run_MergeVcfs_GATK; run_MergeMutectStats_GATK; run_LearnReadOrientationModel_GATK; run_FilterMutectCalls_GATK; filter_VCF_BCFtools; split_VCF_BCFtools } from './mutect2-processes' -include { generate_sha512sum } from './common' +include { run_GetSampleName_Mutect2; run_SplitIntervals_GATK; call_sSNV_Mutect2; run_MergeVcfs_GATK; run_MergeMutectStats_GATK; run_LearnReadOrientationModel_GATK; run_FilterMutectCalls_GATK; split_VCF_BCFtools } from './mutect2-processes' +include { filter_VCF_BCFtools; generate_sha512sum } from './common' include { compress_index_VCF } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( options: [ output_dir: params.workflow_output_dir, @@ -92,13 +92,13 @@ workflow mutect2 { contamination_table.collect() ) filter_VCF_BCFtools(run_FilterMutectCalls_GATK.out.filtered.map{ it -> ['all', it] }) - split_VCF_BCFtools(filter_VCF_BCFtools.out.pass_vcf.map{ it -> it[1] }, ['snps', 'mnps', 'indels']) - compress_index_VCF(split_VCF_BCFtools.out.split_vcf) + split_VCF_BCFtools(filter_VCF_BCFtools.out.gzvcf.map{ it -> it[1] }, ['snps', 'mnps', 'indels']) + compress_index_VCF(split_VCF_BCFtools.out.gzvcf) file_for_sha512 = compress_index_VCF.out.index_out.map{ it -> ["mutect2-${it[0]}-vcf", it[1]] } .mix( compress_index_VCF.out.index_out.map{ it -> ["mutect2-${it[0]}-index", it[2]] } ) generate_sha512sum(file_for_sha512) emit: - vcf = compress_index_VCF.out.index_out + gzvcf = compress_index_VCF.out.index_out .filter { it[0] == 'snps' } .map{ it -> ["${it[1]}"] } idx = compress_index_VCF.out.index_out diff --git a/module/somaticsniper-processes.nf b/module/somaticsniper-processes.nf index f7b6f5b1..da2cfc21 100644 --- a/module/somaticsniper-processes.nf +++ b/module/somaticsniper-processes.nf @@ -280,8 +280,8 @@ process call_HighConfidenceSNV_SomaticSniper { path fp_pass output: - path "*_hc.vcf", emit: hc - path "*_lc.vcf", emit: lc + path "*_hc.vcf", emit: hc_vcf + path "*_lc.vcf" path ".command.*" """ diff --git a/module/somaticsniper.nf b/module/somaticsniper.nf index e6a34f5f..a4644c6f 100644 --- a/module/somaticsniper.nf +++ b/module/somaticsniper.nf @@ -62,11 +62,11 @@ workflow somaticsniper { .map{ it -> ['readcount', it[0]] } ) // rename_samples_BCFtools needs bgzipped input - compress_index_VCF_hc(call_HighConfidenceSNV_SomaticSniper.out.hc + compress_index_VCF_hc(call_HighConfidenceSNV_SomaticSniper.out.hc_vcf .map{ it -> ['SNV', it] }) rename_samples_BCFtools(normal_id, tumor_id, compress_index_VCF_hc.out.index_out .map{ it -> [it[0], it[1]] }) - compress_index_VCF_fix(rename_samples_BCFtools.out.fix_vcf) + compress_index_VCF_fix(rename_samples_BCFtools.out.gzvcf) file_for_sha512 = compress_index_VCF_fix.out.index_out .map{ it -> ["${it[0]}-vcf", it[1]] } .mix(compress_index_VCF_fix.out.index_out @@ -74,6 +74,6 @@ workflow somaticsniper { ) generate_sha512sum(file_for_sha512) emit: - vcf = compress_index_VCF_fix.out.index_out.map{ it -> ["${it[1]}"] } + gzvcf = compress_index_VCF_fix.out.index_out.map{ it -> ["${it[1]}"] } idx = compress_index_VCF_fix.out.index_out.map{ it -> ["${it[2]}"] } } diff --git a/module/strelka2-processes.nf b/module/strelka2-processes.nf index d3bb020d..427febfd 100644 --- a/module/strelka2-processes.nf +++ b/module/strelka2-processes.nf @@ -76,8 +76,8 @@ process call_sSNV_Strelka2 { path intersect_regions_index output: - tuple val("SNV"), path("StrelkaSomaticWorkflow/results/variants/somatic.snvs.vcf.gz"), emit: snvs_vcf - tuple val("Indel"), path("StrelkaSomaticWorkflow/results/variants/somatic.indels.vcf.gz"), emit: indels_vcf + tuple val("SNV"), path("StrelkaSomaticWorkflow/results/variants/somatic.snvs.vcf.gz"), emit: snvs_gzvcf + tuple val("Indel"), path("StrelkaSomaticWorkflow/results/variants/somatic.indels.vcf.gz"), emit: indels_gzvcf path "StrelkaSomaticWorkflow" path ".command.*" @@ -98,28 +98,3 @@ process call_sSNV_Strelka2 { StrelkaSomaticWorkflow/runWorkflow.py -m local -j ${task.cpus} """ } - -process filter_VCF_BCFtools { - container params.docker_image_BCFtools - publishDir path: "${params.workflow_output_dir}/intermediate/${task.process.split(':')[-1]}", - mode: "copy", - pattern: "*.vcf.gz", - enabled: params.save_intermediate_files - publishDir path: "${params.workflow_log_output_dir}", - mode: "copy", - pattern: ".command.*", - saveAs: { "${task.process.split(':')[-1]}-${var_type}/log${file(it).getName()}" } - - input: - tuple val(var_type), path(vcf) - - output: - tuple val(var_type), path("*.vcf.gz"), emit: pass_vcf - path ".command.*" - - script: - """ - set -euo pipefail - bcftools view -f PASS --output-type z --output ${params.output_filename}_${var_type}-pass.vcf.gz ${vcf} - """ - } diff --git a/module/strelka2.nf b/module/strelka2.nf index 429dc997..01424aa4 100644 --- a/module/strelka2.nf +++ b/module/strelka2.nf @@ -1,5 +1,5 @@ -include { call_sSNV_Strelka2; call_sIndel_Manta; filter_VCF_BCFtools } from './strelka2-processes' -include { rename_samples_BCFtools; generate_sha512sum } from './common' +include { call_sSNV_Strelka2; call_sIndel_Manta } from './strelka2-processes' +include { filter_VCF_BCFtools; rename_samples_BCFtools; generate_sha512sum } from './common' include { compress_index_VCF } from '../external/pipeline-Nextflow-module/modules/common/index_VCF_tabix/main.nf' addParams( options: [ @@ -40,21 +40,20 @@ workflow strelka2 { params.intersect_regions, params.intersect_regions_index ) - filter_VCF_BCFtools(call_sSNV_Strelka2.out.snvs_vcf - .mix(call_sSNV_Strelka2.out.indels_vcf)) - normal_id.combine(filter_VCF_BCFtools.out.pass_vcf).map{ it[0] }.set{ normal_id_fix } - tumor_id.combine(filter_VCF_BCFtools.out.pass_vcf).map{ it[0] }.set{ tumor_id_fix } - rename_samples_BCFtools(normal_id_fix, tumor_id_fix, filter_VCF_BCFtools.out.pass_vcf) - compress_index_VCF(rename_samples_BCFtools.out.fix_vcf) + filter_VCF_BCFtools(call_sSNV_Strelka2.out.snvs_gzvcf + .mix(call_sSNV_Strelka2.out.indels_gzvcf)) + normal_id.combine(filter_VCF_BCFtools.out.gzvcf).map{ it[0] }.set{ normal_id_fix } + tumor_id.combine(filter_VCF_BCFtools.out.gzvcf).map{ it[0] }.set{ tumor_id_fix } + rename_samples_BCFtools(normal_id_fix, tumor_id_fix, filter_VCF_BCFtools.out.gzvcf) + compress_index_VCF(rename_samples_BCFtools.out.gzvcf) file_for_sha512 = compress_index_VCF.out.index_out.map{ it -> ["strelka2-${it[0]}-vcf", it[1]] } .mix( compress_index_VCF.out.index_out.map{ it -> ["strelka2-${it[0]}-index", it[2]] } ) generate_sha512sum(file_for_sha512) emit: - vcf = compress_index_VCF.out.index_out + gzvcf = compress_index_VCF.out.index_out .filter { it[0] == 'SNV' } .map{ it -> ["${it[1]}"] } idx = compress_index_VCF.out.index_out .filter { it[0] == 'SNV' } .map{ it -> ["${it[2]}"] } - } diff --git a/nftest.yml b/nftest.yml index 04c206e8..3e031f17 100644 --- a/nftest.yml +++ b/nftest.yml @@ -130,6 +130,62 @@ cases: expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-mutect2-multiple-samples/TWGSAMIN000001/Mutect2-4.4.0.0/Mutect2-4.4.0.0_TWGSAMIN_TWGSAMIN000001_SNV.vcf.gz script: test/assert_vcf.sh + - name: a_mini-two-tools + message: test development branch using somaticsniper and strelka2 with standard paired tumor and normal a-mini-n2 input + nf_script: ./main.nf + nf_config: ./test/config/a_mini-two-tools.config + params_file: ./test/yaml/a_mini_n2-std-input.yaml + skip: false + verbose: true + asserts: + - actual: call-sSNV-7.0.0-rc.1/TWGSAMIN000001-T001-S01-F/SomaticSniper-1.0.5.0/output/SomaticSniper-1.0.5.0_TWGSAMIN_TWGSAMIN000001-T001-S01-F_SNV.vcf.gz + expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-two-tools/TWGSAMIN000001-T001-S01-F/SomaticSniper-1.0.5.0/SomaticSniper-1.0.5.0_TWGSAMIN_TWGSAMIN000001-T001-S01-F_SNV.vcf.gz + script: test/assert_vcf.sh + + - actual: call-sSNV-7.0.0-rc.1/TWGSAMIN000001-T001-S01-F/Strelka2-2.9.10/output/Strelka2-2.9.10_TWGSAMIN_TWGSAMIN000001-T001-S01-F_Indel.vcf.gz + expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-two-tools/TWGSAMIN000001-T001-S01-F/Strelka2-2.9.10/Strelka2-2.9.10_TWGSAMIN_TWGSAMIN000001-T001-S01-F_Indel.vcf.gz + script: test/assert_vcf.sh + + - actual: call-sSNV-7.0.0-rc.1/TWGSAMIN000001-T001-S01-F/Strelka2-2.9.10/output/Strelka2-2.9.10_TWGSAMIN_TWGSAMIN000001-T001-S01-F_SNV.vcf.gz + expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-two-tools/TWGSAMIN000001-T001-S01-F/Strelka2-2.9.10/Strelka2-2.9.10_TWGSAMIN_TWGSAMIN000001-T001-S01-F_SNV.vcf.gz + script: test/assert_vcf.sh + + - actual: call-sSNV-7.0.0-rc.1/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/output/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_SNV-concat.vcf.gz + expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-two-tools/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_SNV-concat.vcf.gz + script: test/assert_vcf.sh + + - actual: call-sSNV-7.0.0-rc.1/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/output/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_SNV-concat.maf.bz2 + expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-two-tools/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_SNV-concat.maf.bz2 + method: md5 + + - actual: call-sSNV-7.0.0-rc.1/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/output/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_Venn-diagram.tiff + expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-two-tools/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_Venn-diagram.tiff + method: md5 + + - actual: call-sSNV-7.0.0-rc.1/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/output/isec-1-or-more/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_README.txt + expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-two-tools/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/isec-1-or-more/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_README.txt + method: md5 + + - actual: call-sSNV-7.0.0-rc.1/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/output/isec-1-or-more/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_sites.txt + expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-two-tools/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/isec-1-or-more/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_sites.txt + method: md5 + + - actual: call-sSNV-7.0.0-rc.1/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/output/isec-2-or-more/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_README.txt + expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-two-tools/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/isec-2-or-more/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_README.txt + method: md5 + + - actual: call-sSNV-7.0.0-rc.1/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/output/isec-2-or-more/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_sites.txt + expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-two-tools/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/isec-2-or-more/BCFtools-1.17_TWGSAMIN_TWGSAMIN000001-T001-S01-F_sites.txt + method: md5 + + - actual: call-sSNV-7.0.0-rc.1/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/output/SomaticSniper-1.0.5.0_TWGSAMIN_TWGSAMIN000001-T001-S01-F_SNV-intersect.vcf.gz + expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-two-tools/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/SomaticSniper-1.0.5.0_TWGSAMIN_TWGSAMIN000001-T001-S01-F_SNV-intersect.vcf.gz + script: test/assert_vcf.sh + + - actual: call-sSNV-7.0.0-rc.1/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/output/Strelka2-2.9.10_TWGSAMIN_TWGSAMIN000001-T001-S01-F_SNV-intersect.vcf.gz + expect: /hot/software/pipeline/pipeline-call-sSNV/Nextflow/development/test-output/a_mini-two-tools/TWGSAMIN000001-T001-S01-F/Intersect-BCFtools-1.17/Strelka2-2.9.10_TWGSAMIN_TWGSAMIN000001-T001-S01-F_SNV-intersect.vcf.gz + script: test/assert_vcf.sh + - name: a_mini-somaticsniper message: test development branch using somaticsniper and standard paired tumor and normal a-mini-n2 input nf_script: ./main.nf diff --git a/test/config/a_mini-two-tools.config b/test/config/a_mini-two-tools.config new file mode 100644 index 00000000..d1ca0db5 --- /dev/null +++ b/test/config/a_mini-two-tools.config @@ -0,0 +1,41 @@ +/** +* EXECUTION SETTINGS AND GLOBAL DEFAULTS +* External config files import. DO NOT MODIFY THIS LINE! +*/ +includeConfig "${projectDir}/config/default.config" +includeConfig "${projectDir}/nextflow.config" +includeConfig "${projectDir}/config/methods.config" + +/** +* Inputs/parameters of the pipeline +*/ + +params { + algorithm = ['somaticsniper', 'strelka2'] + reference = '/hot/ref/reference/GRCh38-BI-20160721/Homo_sapiens_assembly38.fasta' + intersect_regions = '/hot/ref/tool-specific-input/pipeline-call-sSNV-6.0.0/GRCh38-BI-20160721/Homo_sapiens_assembly38_no-decoy.bed.gz' + dataset_id = 'TWGSAMIN' + // setting params.exome to TRUE will add the '--exome' option when running manta and strelka2 and the -E option when running MuSE + exome = false + save_intermediate_files = false + + // module options + bgzip_extra_args = '' + tabix_extra_args = '' + + // mutect2 options + split_intervals_extra_args = '' + mutect2_extra_args = '' + filter_mutect_calls_extra_args = '' + gatk_command_mem_diff = 500.MB + scatter_count = 12 + germline_resource_gnomad_vcf = '/hot/ref/tool-specific-input/GATK/GRCh38/af-only-gnomad.hg38.vcf.gz' + + // MuSE options + dbSNP = '/hot/ref/database/dbSNP-155/original/GRCh38/GCF_000001405.39.gz' + + // Intersect options + ncbi_build = 'GRCh38' + vcf2maf_extra_args = '' +} +methods.setup()