-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcivicdb.py
More file actions
172 lines (152 loc) · 7.4 KB
/
civicdb.py
File metadata and controls
172 lines (152 loc) · 7.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
import csv
assertionPath = "static/civicdb/nightly-AssertionSummaries.tsv"
clinicalEvidencePath = "static/civicdb/nightly-ClinicalEvidenceSummaries.tsv"
genePath = "static/civicdb/nightly-GeneSummaries.tsv"
variantGroupPath = "static/civicdb/nightly-VariantGroupSummaries.tsv"
variantPath = "static/civicdb/nightly-VariantSummaries.tsv"
civicVcfPath = "static/civicdb/nightly-civic_accepted_and_submitted.vcf"
class CivicDb: # GRCh37 (Ensembl v75)
def __init__(self):
self.assertions = {}
self.clinicalEvidences = {}
self.genes = {}
self.variantGroups = {}
self.variants = {}
self._constructAssertions()
self._constructClinicalEvidences()
self._constructGenes()
self._constructVariantGroups()
self._constructVariants()
def _constructAssertions(self):
"""gene entrez_id variant disease doid phenotypes drugs assertion_type assertion_direction
clinical_significance acmg_codes amp_category nccn_guideline nccn_guideline_version regulatory_approval
fda_companion_test assertion_summary assertion_description
assertion_id
evidence_item_ids variant_id gene_id last_review_date assertion_civic_url evidence_items_civic_url
variant_civic_url gene_civic_url"""
with open(assertionPath, mode='r') as tsvFile:
tsvReader = csv.DictReader(tsvFile, delimiter='\t')
for row in tsvReader:
Id = row["assertion_id"]
self.assertions[Id] = row
def _constructClinicalEvidences(self):
"""gene entrez_id variant disease doid phenotypes drugs drug_interaction_type evidence_type
evidence_direction evidence_level clinical_significance evidence_statement citation_id source_type
asco_abstract_id citation nct_ids rating evidence_status
evidence_id
variant_id gene_id chromosome start stop reference_bases variant_bases representative_transcript
chromosome2 start2 stop2 representative_transcript2 ensembl_version reference_build variant_summary
variant_origin last_review_date evidence_civic_url variant_civic_url gene_civic_url"""
with open(clinicalEvidencePath, mode='r') as tsvFile:
tsvReader = csv.DictReader(tsvFile, delimiter='\t')
for row in tsvReader:
Id = row["evidence_id"]
self.clinicalEvidences[Id] = row
def _constructGenes(self):
"""gene_id
gene_civic_url name entrez_id description last_review_date"""
with open(genePath, mode='r') as tsvFile:
tsvReader = csv.DictReader(tsvFile, delimiter='\t')
for row in tsvReader:
Id = row["gene_id"]
self.genes[Id] = row
def _constructVariantGroups(self):
"""variant_group_id
variant_group_civic_url variant_group description last_review_date"""
with open(variantGroupPath, mode='r') as tsvFile:
tsvReader = csv.DictReader(tsvFile, delimiter='\t')
for row in tsvReader:
Id = row["variant_group_id"]
self.variantGroups[Id] = row
def _constructVariants(self):
"""variant_id
variant_civic_url gene entrez_id variant summary variant_groups chromosome start stop
reference_bases variant_bases representative_transcript ensembl_version reference_build chromosome2 start2
stop2 representative_transcript2 variant_types hgvs_expressions last_review_date
civic_variant_evidence_score allele_registry_id clinvar_ids variant_aliases assertion_ids
assertion_civic_urls"""
with open(variantPath, mode='r') as tsvFile:
tsvReader = csv.DictReader(tsvFile, delimiter='\t')
for row in tsvReader:
Id = row["variant_id"]
self.variants[Id] = row
def findVariantsFromLocation(self, chromosome, location, ref=None, alt=None): # Returns array of variant dicts
variants = []
for v in self.variants.items():
if "chromosome" in v[1] and "start" in v[1] and "stop" in v[1]:
if v[1]["chromosome"] == str(chromosome) and int(v[1]["start"]) <= location <= int(v[1]["stop"]):
variants.append(v[1])
elif "chromosome2" in v[1] and "start2" in v[1] and "stop2" in v[1]:
if v[1]["chromosome2"] == str(chromosome) and int(v[1]["start2"]) <= location <= int(v[1]["stop2"]):
variants.append(v[1])
if ref is None or alt is None:
return variants
if len(variants) == 1:
return variants
for v in variants.copy():
if "reference_bases" in v and "variant_bases" in v:
if v["reference_bases"] == ref and v["variant_bases"] == alt:
return [v]
if v["reference_bases"] != "" and v["variant_bases"] != "":
variants.remove(v)
return variants
def findGene(self, arg): # Gets either gene name or entrez id, returns gene dict
isGeneName = False
isEntrezId = False
newArg = ""
if type(arg) is str:
try:
arg = int(arg)
isEntrezId = True
except ValueError:
isGeneName = True
elif type(arg) is int:
isEntrezId = True
else:
return Exception("Invalid arguments.")
gene = {}
if isGeneName:
for g in self.genes.items():
if g[1]["name"] == arg:
gene = g[1]
break
elif isEntrezId:
for g in self.genes.items():
if g[1]["entrez_id"] == str(arg):
gene = g[1]
break
else:
Exception("Unknown error.")
return gene
def findGeneFromLocation(self, chromosome, location): # Returns gene dict
variants = self.findVariantsFromLocation(chromosome, location)
gene = self.findGene(variants[0]["gene"])
return gene
def findVariantGroups(self, groups): # Gets variant_groups, returns variant groups
variantGroups = []
groups = groups.split(',')
for g in groups:
for vg in self.variantGroups.items():
if "variant_group" in vg[1] and vg[1]["variant_group"] == g:
variantGroups.append(vg[1])
return variantGroups
def findAssertions(self, variantId): # Gets variant_id, returns assertions
assertions = []
for a in self.assertions.items():
if "variant_id" in a[1] and a[1]["variant_id"] == str(variantId):
assertions.append(a[1])
return assertions
def findClinicalEvidences(self, variantId): # Gets variant_id, returns clinical evidences
clinicalEvidences = []
for ce in self.clinicalEvidences.items():
if "variant_id" in ce[1] and ce[1]["variant_id"] == str(variantId):
clinicalEvidences.append(ce[1])
return clinicalEvidences
"""db = CivicDb()
variants = db.findVariantsFromLocation("7", 140453136, "A", "T")
for v in variants:
for value in v.items():
print(value[0], "->", value[1])
print()
print()"""
# variant, representative_transcript, konum