-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFetch_Siyi.py
142 lines (119 loc) · 3.89 KB
/
Fetch_Siyi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import sqlite3
import pandas as pd
DatabaseName = './Team2_Siyi.db'
connection = sqlite3.connect(DatabaseName)
cur = connection.cursor()
# Fetch a list of whole Gene Name
Genes = """
SELECT DISTINCT EPIC.GeneName
FROM EPIC
WHERE EPIC.GeneName is NOT NULL;
"""
result = cur.execute(Genes).fetchall()
gene_list = []
for i in result:
gene, = i
if ';' in gene:
temp_gene = gene.split(';')
for each in temp_gene:
gene_list.append(each)
else:
gene_list.append(gene)
print('There are', len(gene_list), 'genes in total!')
# with open("gene_list.txt", "w") as output:
# output.write(str(gene_list))
print("Save the gene list named gene_list.txt at the current directory!")
# input a gene name and format the Gene Name
GN = 'DDX31'
GN = "'%" + GN + "%'"
print('Gene ', GN, ' is selected.')
probe_query = """
SELECT EPIC.ProbeName
FROM EPIC
WHERE EPIC.GeneName LIKE {};
"""
# query for Jess
# "SELECT EPIC.ProbeName \
# FROM EPIC \
# WHERE EPIC.GeneName LIKE ?;"
probe_result = cur.execute(probe_query.format(str(GN))).fetchall()
probe_list = []
for i in probe_result:
probe_list.append(i[0])
print('There are ', len(probe_list), 'probes corresponding to the Gene', GN)
with open("probe_list.txt", "w") as output:
output.write(str(probe_list))
print('Please select one of the probe.')
probe = probe_list[1]
probe = "'" + probe + "'"
print('The probe you select is', probe)
# Get the Probe Key for the input Porbe Name
index_CpG = """
SELECT PROBEINFO.ProbeKey
FROM PROBEINFO
WHERE PROBEINFO.ProbeName = {} ;
"""
# query for Jess
# "SELECT PROBEINFO.ProbeKey \
# FROM PROBEINFO \
# WHERE PROBEINFO.ProbeName = ?;"
result = cur.execute(index_CpG.format(probe)).fetchall()
index_CpG = result[0][0]
print('Probe', probe, 'is No.', index_CpG)
# Based on the Porbe Key, run the corresponding query
if index_CpG <= 107899:
print('Go to AAA table')
data_query = """
SELECT PROBEINFO.ProbeName, PHENO.Age, PHENO.Sex, AAAVALUE.Value
FROM PROBEINFO, PHENO, AAAVALUE
WHERE AAAVALUE.ProbeKey = {}
AND PROBEINFO.ProbeKey = {}
AND AAAVALUE.SampleKey = PHENO.SampleKey;
"""
elif 307899 >= index_CpG > 107899:
print('Go to BBB table')
data_query = """
SELECT PROBEINFO.ProbeName, PHENO.Age, PHENO.Sex, BBBVALUE.Value
FROM PROBEINFO, PHENO, BBBVALUE
WHERE BBBVALUE.ProbeKey = {}
AND PROBEINFO.ProbeKey = {}
AND BBBVALUE.SampleKey = PHENO.SampleKey;
"""
elif 507899 >= index_CpG > 307899:
print('Go to CCC table')
data_query = """
SELECT PROBEINFO.ProbeName, PHENO.Age, PHENO.Sex, CCCVALUE.Value
FROM PROBEINFO, PHENO, CCCVALUE
WHERE CCCVALUE.ProbeKey = {}
AND PROBEINFO.ProbeKey = {}
AND CCCVALUE.SampleKey = PHENO.SampleKey;
"""
elif 807899 >= index_CpG > 507899:
print('Go to DDD table')
data_query = """
SELECT PROBEINFO.ProbeName, PHENO.Age, PHENO.Sex, DDDVALUE.Value
FROM PROBEINFO, PHENO, DDDVALUE
WHERE DDDVALUE.ProbeKey = {}
AND PROBEINFO.ProbeKey = {}
AND DDDVALUE.SampleKey = PHENO.SampleKey;
"""
data_result = cur.execute(data_query.format(index_CpG, index_CpG)).fetchall()
# save the result here
CpG_list = []
Age_list = []
Value_list = []
Sex_list = []
for i in data_result:
CpG, Age, Sex, Value = i
CpG_list.append(CpG)
Age_list.append(Age)
Value_list.append(Value)
Sex_list.append(Sex)
result_df = pd.DataFrame(list(zip(CpG_list, Age_list, Value_list, Sex_list)))
result_df.columns = ['CpG', 'Age', 'Value', 'Sex']
print(result_df)
# result_df.to_csv('./result_Siyi.csv', index=False)
print("Save the result as csv file at the current directory!")
cur.close()
connection.commit()
connection.close()