26
26
import sys
27
27
28
28
from common import read_fasta , chrom_sort
29
- from fusion import get_transcriptome_data , read_fusions , insert_fusions , get_exons
29
+ from fusion import (get_exons ,
30
+ get_transcriptome_data ,
31
+ insert_fusions ,
32
+ read_fusions ,
33
+ read_genes_from_gtf )
30
34
31
35
32
36
# A named tuple describing the result of running reject_mutation on a mutation.
@@ -1036,15 +1040,17 @@ def main(params):
1036
1040
transcriptome , gene_transcript_ids = None , None
1037
1041
1038
1042
# Load data from fusion file
1043
+ fusions = exons = None
1039
1044
if params .fusion_file :
1040
- fusions = read_fusions (params .fusion_file )
1041
- else :
1042
- fusions = None
1043
-
1044
- if params .genome_file and params .annotation_file :
1045
- exons = get_exons (params .genome_file , params .annotation_file )
1046
- else :
1047
- exons = None
1045
+ gene_annotations = read_genes_from_gtf (params .annotation_file )
1046
+ out_bedpe = open ('_' .join ([params .prefix , 'transgened.bedpe' ]), 'w' )
1047
+ try :
1048
+ fusions = read_fusions (params .fusion_file , gene_annotations , params .filter_mt ,
1049
+ params .filter_ig , params .filter_rg , params .filter_rt ,
1050
+ params .rt_threshold , out_bedpe )
1051
+ finally :
1052
+ out_bedpe .close ()
1053
+ exons = get_exons (params .genome_file , params .annotation_file , fusions )
1048
1054
1049
1055
for peplen in params .pep_lens .split (',' ):
1050
1056
logging .info ('Processing %s-mers' , peplen )
@@ -1057,7 +1063,8 @@ def main(params):
1057
1063
if proteome_data and snvs :
1058
1064
insert_snvs (proteome_data , snvs , tumfile , normfile , int (peplen ), params .rna_file )
1059
1065
if transcriptome and gene_transcript_ids and fusions :
1060
- insert_fusions (transcriptome , fusions , gene_transcript_ids , int (peplen ), tumfile , exons = exons )
1066
+ insert_fusions (transcriptome , fusions , gene_transcript_ids , int (peplen ), tumfile ,
1067
+ exons = exons )
1061
1068
1062
1069
if params .no_json_dumps :
1063
1070
shutil .move (tumfile_path , '_' .join ([params .prefix , 'tumor' , peplen ,
@@ -1080,36 +1087,36 @@ def run_transgene():
1080
1087
This will try to run transgene from system arguments
1081
1088
"""
1082
1089
parser = argparse .ArgumentParser (description = main .__doc__ )
1083
- parser . add_argument ( '--peptides' , dest = 'peptide_file' ,
1084
- type = argparse .FileType ('r' ),
1090
+ # SNV related options
1091
+ parser . add_argument ( '--peptides' , dest = 'peptide_file' , type = argparse .FileType ('r' ),
1085
1092
help = 'Path to GENCODE translation FASTA file' )
1086
- parser .add_argument ('--transcripts' , dest = 'transcript_file' ,
1087
- type = argparse .FileType ('r' ),
1088
- help = 'Path to GENCODE transcript FASTA file' )
1089
- parser .add_argument ('--snpeff' , dest = 'snpeff_file' ,
1090
- type = argparse .FileType ('r' ),
1093
+ parser .add_argument ('--snpeff' , dest = 'snpeff_file' , type = argparse .FileType ('r' ),
1091
1094
help = 'Path to snpeff file' )
1092
- parser .add_argument ('--cores' , dest = 'cores' , type = int ,
1093
- help = 'Number of cores to use for the filtering step.' , required = False ,
1094
- default = 1 )
1095
- parser .add_argument ('--fusions' , dest = 'fusion_file' ,
1096
- help = 'Path to gene fusion file' ,
1095
+
1096
+ # Fusion related options
1097
+ parser .add_argument ('--fusions' , dest = 'fusion_file' , help = 'Path to gene fusion file' ,
1097
1098
type = argparse .FileType ('r' ))
1099
+ parser .add_argument ('--transcripts' , dest = 'transcript_file' , type = argparse .FileType ('r' ),
1100
+ help = 'Path to GENCODE transcript FASTA file. Required if calling fusions.' )
1098
1101
parser .add_argument ('--genome' , dest = 'genome_file' ,
1099
- help = 'Path to reference genome file' ,
1102
+ help = 'Path to reference genome file, Required if calling fusions. ' ,
1100
1103
type = argparse .FileType ('r' ))
1101
1104
parser .add_argument ('--annotation' , dest = 'annotation_file' ,
1102
- help = 'Path to gencode annotation file' ,
1105
+ help = 'Path to gencode annotation file. Required if calling fusions. ' ,
1103
1106
type = argparse .FileType ('r' ))
1104
- parser .add_argument ('--prefix' , dest = 'prefix' , type = str ,
1105
- help = 'Prefix for output file names' , required = True )
1106
- parser .add_argument ('--pep_lens' , dest = 'pep_lens' , type = str ,
1107
- help = 'Desired peptide lengths to process. '
1108
- 'The argument should be in the form of comma separated values. '
1109
- 'E.g. 9,15' , required = False , default = '9,10,15' )
1110
- parser .add_argument ('--no_json_dumps' , action = 'store_true' ,
1111
- help = 'Do not educe peptide fasta record names in the output by dumping the '
1112
- 'mapping info into a .map json file.' , required = False , default = False )
1107
+ parser .add_argument ('--filter_mt_fusions' , dest = 'filter_mt' , action = 'store_true' ,
1108
+ help = 'Filter fusions involving Mitochondrial genes.' , required = False )
1109
+ parser .add_argument ('--filter_ig_pairs' , dest = 'filter_ig' , action = 'store_true' ,
1110
+ help = 'Filter fusions involving two immunoglobulin genes (IGXXX).' ,
1111
+ required = False )
1112
+ parser .add_argument ('--filter_rna_gene_fusions' , dest = 'filter_rg' , action = 'store_true' ,
1113
+ help = 'Filter fusions involving RNA genes (RP11-XXXX).' , required = False )
1114
+ parser .add_argument ('--filter_readthroughs' , dest = 'filter_rt' , action = 'store_true' ,
1115
+ help = 'Filter transcriptional read-troughs.' , required = False )
1116
+ parser .add_argument ('--readthrough_threshold' , dest = 'rt_threshold' , type = int ,
1117
+ help = 'Genomic distance between candidates on the same strand below which a '
1118
+ 'fusion will be considered a read-through.' , default = 500000 , required = False )
1119
+
1113
1120
# RNA-Aware options
1114
1121
parser .add_argument ('--rna_file' , dest = 'rna_file' , help = 'The path to an RNA-seq bam file. If '
1115
1122
'provided, the vcf will be filtered for coding mutations only. The file '
@@ -1122,6 +1129,7 @@ def run_transgene():
1122
1129
parser .add_argument ('--min_rna_alt_freq' , dest = 'rna_min_alt_freq' , help = 'The ALT allele '
1123
1130
'frequency (as a fraction) in the RNA-Seq below which we will reject the '
1124
1131
'mutation.' , type = float , required = False , default = 0.1 )
1132
+
1125
1133
# OxoG filtering options
1126
1134
parser .add_argument ('--filterOxoG' , dest = 'filter_oxog' , action = 'store_true' , help = 'Filter the '
1127
1135
'calls for OxoG artifacts. This feature requires a tumor dna bam as input.' ,
@@ -1133,19 +1141,38 @@ def run_transgene():
1133
1141
'allele frequency (as a fraction) in the DNA-Seq below which we will flag'
1134
1142
'the mutation as being an OxoG variant.' ,
1135
1143
type = float , required = False , default = 0.1 )
1144
+
1145
+ # Logging
1136
1146
parser .add_argument ('--log_level' , dest = 'log_level' , help = 'The level of logging above which '
1137
1147
'messages should be printed.' , required = False , choices = {'DEBUG' , 'INFO' ,
1138
1148
'WARNING' , 'ERROR' },
1139
1149
default = 'INFO' )
1140
1150
parser .add_argument ('--log_file' , dest = 'logfile' , help = 'A path to a logfile.' , type = str ,
1141
1151
required = False , default = None )
1152
+
1153
+ # Misc
1154
+ parser .add_argument ('--prefix' , dest = 'prefix' , type = str , help = 'Prefix for output file names.' ,
1155
+ required = True )
1156
+ parser .add_argument ('--pep_lens' , dest = 'pep_lens' , type = str , help = 'Desired peptide lengths to '
1157
+ 'process. The argument should be in the form of comma separated values. '
1158
+ 'E.g. 9,15' , required = False , default = '9,10,15' )
1159
+ parser .add_argument ('--no_json_dumps' , action = 'store_true' ,
1160
+ help = 'Do not educe peptide fasta record names in the output by dumping the '
1161
+ 'mapping info into a .map json file.' , required = False , default = False )
1162
+ parser .add_argument ('--cores' , dest = 'cores' , type = int ,
1163
+ help = 'Number of cores to use for the filtering step.' , required = False ,
1164
+ default = 1 )
1142
1165
params = parser .parse_args ()
1143
1166
1144
1167
if params .snpeff_file and not params .peptide_file :
1145
1168
raise ValueError ('VCF file requires GENCODE translation FASTA file' )
1146
1169
1147
1170
if params .fusion_file and not params .transcript_file :
1148
- raise ValueError ('Fusion file requires GENCODE transcripts FASTA file' )
1171
+ raise ValueError ('Fusion calling requires GENCODE transcripts FASTA file' )
1172
+ if params .fusion_file and not params .annotation_file :
1173
+ raise ValueError ('Fusion calling requires GENCODE gtf annotation file' )
1174
+ if params .fusion_file and not params .genome_file :
1175
+ raise ValueError ('Fusion calling requires genomic fasta' )
1149
1176
1150
1177
if params .filter_oxog :
1151
1178
if not params .dna_file :
0 commit comments