From 4a3631510b23efbf5a9cdd2490ec202aaafc6804 Mon Sep 17 00:00:00 2001 From: Arjun Arkal Rao Date: Tue, 15 May 2018 18:16:57 -0700 Subject: [PATCH] Add fusion filters for transgene (resolves #237) resolves #237 Added all fusion parameters to the input config so users can control what fusion filters are applied. --- MANUAL.md | 5 +++++ src/protect/mutation_translation.py | 9 +++++++++ src/protect/pipeline/defaults.yaml | 5 +++++ src/protect/pipeline/input_parameters.yaml | 5 +++++ 4 files changed, 24 insertions(+) diff --git a/MANUAL.md b/MANUAL.md index b023baad..fa148136 100644 --- a/MANUAL.md +++ b/MANUAL.md @@ -414,6 +414,11 @@ be substituted with S3 links. Descriptions for creating all files can be found i gencode_transcript_fasta : /path/to/gencode_transcripts.faa -> The transcript file for the gencode gtf. gencode_annotation_gtf : /path/to/gencode_annotation.gtf -> The gencode genome annotation file. genome_fasta : /path/to/hg19.faa -> The gencode genome fasta file + filter_mt_fusions: True -> Switch to filter mitochondrial gene pairs + filter_ig_pairs: True -> Switch to filter immunoglobulin gene pairs + filter_rna_gene_fusions: True -> Switch to filter rna-gene pairs + filter_readthroughs: True -> Switch to filter readthroughs + readthrough_threshold: 500000 -> Threshold below which pairs will be called readthroughs version: 2.2.2 haplotyping: diff --git a/src/protect/mutation_translation.py b/src/protect/mutation_translation.py index 68e76221..37851b4d 100644 --- a/src/protect/mutation_translation.py +++ b/src/protect/mutation_translation.py @@ -110,6 +110,15 @@ def run_transgene(job, snpeffed_file, rna_bam, univ_options, transgene_options, fusion_files = {key: docker_path(path) for key, path in fusion_files.items()} parameters += ['--transcripts', fusion_files['transcripts.fa'], '--fusions', fusion_files['fusion_calls']] + if transgene_options['filter_mt_fusions'] is True: + parameters.append('--filter_mt') + if transgene_options['filter_ig_pairs'] is True: + parameters.append('--filter_ig') + if transgene_options['filter_rg'] is True: + parameters.append('--filter_rna_gene_fusions') + if transgene_options['filter_readthroughs'] is True: + parameters.append('--filter_rt') + parameters.extend(['--rt_threshold', transgene_options['readthrough_threshold']]) docker_call(tool='transgene', tool_parameters=parameters, diff --git a/src/protect/pipeline/defaults.yaml b/src/protect/pipeline/defaults.yaml index 145d2fa4..fb81538f 100644 --- a/src/protect/pipeline/defaults.yaml +++ b/src/protect/pipeline/defaults.yaml @@ -88,6 +88,11 @@ mutation_annotation: mutation_translation: transgene: + filter_mt_fusions: True + filter_ig_pairs: True + filter_rna_gene_fusions: True + filter_readthroughs: True + readthrough_threshold: 500000 version: 2.5.0 haplotyping: diff --git a/src/protect/pipeline/input_parameters.yaml b/src/protect/pipeline/input_parameters.yaml index 5dc5943e..9adb485a 100644 --- a/src/protect/pipeline/input_parameters.yaml +++ b/src/protect/pipeline/input_parameters.yaml @@ -133,6 +133,11 @@ mutation_translation: gencode_peptide_fasta : S3://protect-data/hg38_references/gencode.v25.pc_translations_NOPARY.fa.tar.gz gencode_transcript_fasta : S3://protect-data/hg38_references/gencode.v25.pc_transcripts_NOPARY.fa.tar.gz gencode_annotation_gtf : S3://protect-data/hg38_references/gencode.v25.annotation_NOPARY.gtf.tar.gz + filter_mt_fusions: True + filter_ig_pairs: True + filter_rna_gene_fusions: True + filter_readthroughs: True + readthrough_threshold: 500000 genome_fasta : S3://protect-data/hg38_references/hg38.fa.tar.gz # version: 2.2.2