-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathproximity.py
115 lines (99 loc) · 3.31 KB
/
proximity.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# caculate proximity between drugs and disease in the interactome network
#
import math
import networkx as nx
from networkx.algorithms.shortest_paths.generic import shortest_path_length
import random
from random import sample
def read_ppin(file_ppin):
ppin = []
with open(file_ppin, 'r') as f:
for line in f:
if '#' not in line:
line = line.strip().split('\t')
node1 = line[0]
node2 = line[1]
ppin.append((node1, node2))
return ppin
def read_disease_genes(file_genes):
geneset = []
with open(file_genes, 'r') as f:
for line in f:
if '#' not in line:
gene = line.strip()
geneset.append(gene)
return geneset
def read_drugs_info(file_drugs):
drug = {}
with open(file_drugs, 'r', encoding='utf-8') as f:
for line in f:
if '#' not in line:
line = line.strip().split('\t')
targets = line[1].split(',')
targets_ = [i.upper() for i in targets if i]
drug[line[0]] = targets_
return drug
def weight(file_weight):
w = {}
with open(file_weight, 'r') as f:
for line in f:
line = line.strip().split('\t')
w[line[0]] = math.log(int(line[1])+1)
return w
def calculate_distance(graph, sourceset, targetset, weight):
res = 0
n = len(sourceset)
for s in sourceset:
ds = []
for t in targetset:
try:
d = shortest_path_length(graph, s, t)
if d == 0: # the target is one of the disease genes
ds.append(d-weight[t])
else: # the target is not a disease gene
ds.append(d)
except:
# for genes that are not in the PPIN, give them a large distance value
ds.append(10000)
res += min(ds)
distance = res/n
return distance
if __name__ == '__main__':
fppin = r'data\ppin.txt'
fdrug = r'data\drugs.txt'
fgene = r'data\ad_genes.txt'
fwt = r'data\weight.txt'
fres = r'data\distance_of_drugs.txt'
# fref = r'data\distance_of_null.txt'
PPI = read_ppin(fppin)
DRUG = read_drugs_info(fdrug)
GSET = read_disease_genes(fgene)
print('Start calculating...')
# construct a protrin-protein network
G = nx.Graph()
G.add_edges_from(PPI)
W = weight(fwt)
RES = {}
for i in DRUG:
print(i)
d = calculate_distance(G, DRUG[i], GSET, W)
RES[i] = d
with open(fres, 'w', encoding='utf-8') as f:
f.write('#Drugs'+'\t'+'Distance'+'\t'+'Num_targets'+'\n')
for k, v in RES.items():
line = str(k)+'\t'+str(v)+'\t'+str(len(DRUG[k]))+'\n'
f.write(line)
# # calculate reference distace
# print('Reference distance...')
# random.seed(667)
# seq = {i: int(random.weibullvariate(1, 0.5))+1 for i in range(10000)}
# DRUG_REF = {i: sample(nodes(G), j) for i, j in seq.items()}
# REF = {}
# for i in DRUG_REF:
# d = calculate_distance(G, DRUG_REF[i], GSET, W)
# REF[i] = d
# with open(fref, 'w', encoding='utf-8') as f:
# for k, v in REF.items():
# line = str(k)+'\t'+str(v)+'\t'+str(len(DRUG_REF[k]))+'\n'
# f.write(line)
# print('Finish!')