26
26
import sys
27
27
28
28
from common import read_fasta , chrom_sort
29
- from fusion import get_transcriptome_data , read_fusions , insert_fusions , get_exons
29
+ from fusion import (get_exons ,
30
+ get_transcriptome_data ,
31
+ insert_fusions ,
32
+ read_fusions ,
33
+ read_genes_from_gtf )
30
34
31
35
32
36
# A named tuple describing the result of running reject_mutation on a mutation.
@@ -1036,15 +1040,13 @@ def main(params):
1036
1040
transcriptome , gene_transcript_ids = None , None
1037
1041
1038
1042
# Load data from fusion file
1043
+ fusions = exons = None
1039
1044
if params .fusion_file :
1040
- fusions = read_fusions (params .fusion_file )
1041
- else :
1042
- fusions = None
1043
-
1044
- if params .genome_file and params .annotation_file :
1045
- exons = get_exons (params .genome_file , params .annotation_file )
1046
- else :
1047
- exons = None
1045
+ gene_annotations = read_genes_from_gtf (params .annotation_file )
1046
+ fusions = read_fusions (params .fusion_file , gene_annotations , params .filter_mt ,
1047
+ params .filter_ig , params .filter_rg , params .filter_rt ,
1048
+ params .rt_threshold )
1049
+ exons = get_exons (params .genome_file , params .annotation_file , fusions )
1048
1050
1049
1051
for peplen in params .pep_lens .split (',' ):
1050
1052
logging .info ('Processing %s-mers' , peplen )
@@ -1057,7 +1059,8 @@ def main(params):
1057
1059
if proteome_data and snvs :
1058
1060
insert_snvs (proteome_data , snvs , tumfile , normfile , int (peplen ), params .rna_file )
1059
1061
if transcriptome and gene_transcript_ids and fusions :
1060
- insert_fusions (transcriptome , fusions , gene_transcript_ids , int (peplen ), tumfile , exons = exons )
1062
+ insert_fusions (transcriptome , fusions , gene_transcript_ids , int (peplen ), tumfile ,
1063
+ exons = exons )
1061
1064
1062
1065
if params .no_json_dumps :
1063
1066
shutil .move (tumfile_path , '_' .join ([params .prefix , 'tumor' , peplen ,
@@ -1080,36 +1083,36 @@ def run_transgene():
1080
1083
This will try to run transgene from system arguments
1081
1084
"""
1082
1085
parser = argparse .ArgumentParser (description = main .__doc__ )
1083
- parser . add_argument ( '--peptides' , dest = 'peptide_file' ,
1084
- type = argparse .FileType ('r' ),
1086
+ # SNV related options
1087
+ parser . add_argument ( '--peptides' , dest = 'peptide_file' , type = argparse .FileType ('r' ),
1085
1088
help = 'Path to GENCODE translation FASTA file' )
1086
- parser .add_argument ('--transcripts' , dest = 'transcript_file' ,
1087
- type = argparse .FileType ('r' ),
1088
- help = 'Path to GENCODE transcript FASTA file' )
1089
- parser .add_argument ('--snpeff' , dest = 'snpeff_file' ,
1090
- type = argparse .FileType ('r' ),
1089
+ parser .add_argument ('--snpeff' , dest = 'snpeff_file' , type = argparse .FileType ('r' ),
1091
1090
help = 'Path to snpeff file' )
1092
- parser .add_argument ('--cores' , dest = 'cores' , type = int ,
1093
- help = 'Number of cores to use for the filtering step.' , required = False ,
1094
- default = 1 )
1095
- parser .add_argument ('--fusions' , dest = 'fusion_file' ,
1096
- help = 'Path to gene fusion file' ,
1091
+
1092
+ # Fusion related options
1093
+ parser .add_argument ('--fusions' , dest = 'fusion_file' , help = 'Path to gene fusion file' ,
1097
1094
type = argparse .FileType ('r' ))
1095
+ parser .add_argument ('--transcripts' , dest = 'transcript_file' , type = argparse .FileType ('r' ),
1096
+ help = 'Path to GENCODE transcript FASTA file. Required if calling fusions.' )
1098
1097
parser .add_argument ('--genome' , dest = 'genome_file' ,
1099
- help = 'Path to reference genome file' ,
1098
+ help = 'Path to reference genome file, Required if calling fusions. ' ,
1100
1099
type = argparse .FileType ('r' ))
1101
1100
parser .add_argument ('--annotation' , dest = 'annotation_file' ,
1102
- help = 'Path to gencode annotation file' ,
1101
+ help = 'Path to gencode annotation file. Required if calling fusions. ' ,
1103
1102
type = argparse .FileType ('r' ))
1104
- parser .add_argument ('--prefix' , dest = 'prefix' , type = str ,
1105
- help = 'Prefix for output file names' , required = True )
1106
- parser .add_argument ('--pep_lens' , dest = 'pep_lens' , type = str ,
1107
- help = 'Desired peptide lengths to process. '
1108
- 'The argument should be in the form of comma separated values. '
1109
- 'E.g. 9,15' , required = False , default = '9,10,15' )
1110
- parser .add_argument ('--no_json_dumps' , action = 'store_true' ,
1111
- help = 'Do not educe peptide fasta record names in the output by dumping the '
1112
- 'mapping info into a .map json file.' , required = False , default = False )
1103
+ parser .add_argument ('--filter_mt_fusions' , dest = 'filter_mt' , action = 'store_true' ,
1104
+ help = 'Filter fusions involving Mitochondrial genes.' , required = False )
1105
+ parser .add_argument ('--filter_ig_pairs' , dest = 'filter_ig' , action = 'store_true' ,
1106
+ help = 'Filter fusions involving two immunoglobulin genes (IGXXX).' ,
1107
+ required = False )
1108
+ parser .add_argument ('--filter_rna_gene_fusions' , dest = 'filter_rg' , action = 'store_true' ,
1109
+ help = 'Filter fusions involving RNA genes (RP11-XXXX).' , required = False )
1110
+ parser .add_argument ('--filter_readthroughs' , dest = 'filter_rt' , action = 'store_true' ,
1111
+ help = 'Filter transcriptional read-troughs.' , required = False )
1112
+ parser .add_argument ('--readthrough_threshold' , dest = 'rt_threshold' , type = int ,
1113
+ help = 'Genomic distance between candidates on the same strand below which a '
1114
+ 'fusion will be considered a read-through.' , default = 500000 , required = False )
1115
+
1113
1116
# RNA-Aware options
1114
1117
parser .add_argument ('--rna_file' , dest = 'rna_file' , help = 'The path to an RNA-seq bam file. If '
1115
1118
'provided, the vcf will be filtered for coding mutations only. The file '
@@ -1122,6 +1125,7 @@ def run_transgene():
1122
1125
parser .add_argument ('--min_rna_alt_freq' , dest = 'rna_min_alt_freq' , help = 'The ALT allele '
1123
1126
'frequency (as a fraction) in the RNA-Seq below which we will reject the '
1124
1127
'mutation.' , type = float , required = False , default = 0.1 )
1128
+
1125
1129
# OxoG filtering options
1126
1130
parser .add_argument ('--filterOxoG' , dest = 'filter_oxog' , action = 'store_true' , help = 'Filter the '
1127
1131
'calls for OxoG artifacts. This feature requires a tumor dna bam as input.' ,
@@ -1133,19 +1137,38 @@ def run_transgene():
1133
1137
'allele frequency (as a fraction) in the DNA-Seq below which we will flag'
1134
1138
'the mutation as being an OxoG variant.' ,
1135
1139
type = float , required = False , default = 0.1 )
1140
+
1141
+ # Logging
1136
1142
parser .add_argument ('--log_level' , dest = 'log_level' , help = 'The level of logging above which '
1137
1143
'messages should be printed.' , required = False , choices = {'DEBUG' , 'INFO' ,
1138
1144
'WARNING' , 'ERROR' },
1139
1145
default = 'INFO' )
1140
1146
parser .add_argument ('--log_file' , dest = 'logfile' , help = 'A path to a logfile.' , type = str ,
1141
1147
required = False , default = None )
1148
+
1149
+ # Misc
1150
+ parser .add_argument ('--prefix' , dest = 'prefix' , type = str , help = 'Prefix for output file names.' ,
1151
+ required = True )
1152
+ parser .add_argument ('--pep_lens' , dest = 'pep_lens' , type = str , help = 'Desired peptide lengths to '
1153
+ 'process. The argument should be in the form of comma separated values. '
1154
+ 'E.g. 9,15' , required = False , default = '9,10,15' )
1155
+ parser .add_argument ('--no_json_dumps' , action = 'store_true' ,
1156
+ help = 'Do not educe peptide fasta record names in the output by dumping the '
1157
+ 'mapping info into a .map json file.' , required = False , default = False )
1158
+ parser .add_argument ('--cores' , dest = 'cores' , type = int ,
1159
+ help = 'Number of cores to use for the filtering step.' , required = False ,
1160
+ default = 1 )
1142
1161
params = parser .parse_args ()
1143
1162
1144
1163
if params .snpeff_file and not params .peptide_file :
1145
1164
raise ValueError ('VCF file requires GENCODE translation FASTA file' )
1146
1165
1147
1166
if params .fusion_file and not params .transcript_file :
1148
- raise ValueError ('Fusion file requires GENCODE transcripts FASTA file' )
1167
+ raise ValueError ('Fusion calling requires GENCODE transcripts FASTA file' )
1168
+ if params .fusion_file and not params .annotation_file :
1169
+ raise ValueError ('Fusion calling requires GENCODE gtf annotation file' )
1170
+ if params .fusion_file and not params .genome_file :
1171
+ raise ValueError ('Fusion calling requires genomic fasta' )
1149
1172
1150
1173
if params .filter_oxog :
1151
1174
if not params .dna_file :
0 commit comments