This repository was archived by the owner on Mar 21, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanc_dp.py
103 lines (100 loc) · 3.33 KB
/
anc_dp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import sys
import numpy as np
import math
import scipy.linalg as la
import dendropy as dp
import tree_reader
import aln_reader
"""
sq_change which produces the same results as ML without SE
"""
def calc_cont_anc_states(tree):
df = 0
nodenum = {}
count = 0
for k in tree.postorder_edge_iter():
i = k.head_node
if len(i.child_nodes()) == 0:
i.data['val'] = float(i.data['cont_values'][0])
i.data['valse'] = float(i.data['cont_values'][0])
else:
nodenum[i] = count
count += 1
df += 1
i.data['val'] = 0.
i.data['valse'] = 0.
df -= 1
#compute the mlest of the root
fullMcp = np.zeros((df+1,df+1))
fullVcp = np.zeros(df+1)
count = 0
for k in tree.postorder_edge_iter():
i = k.head_node
if len(i.child_nodes()) != 0:
nni = nodenum[i]
for j in i.child_nodes():
if j.edge_length == None:
j.edge_length = 0.000000001
tbl = 2./j.edge_length
fullMcp[nni][nni] += tbl;
if len(j.child_nodes()) == 0:
fullVcp[nni] += (j.data['val'] * tbl)
else:
nnj = nodenum[j]
fullMcp[nni][nnj] -= tbl;
fullMcp[nnj][nni] -= tbl;
fullMcp[nnj][nnj] += tbl;
count += 1
b = la.cho_factor(fullMcp)
#these are the ML estimates for the ancestral states
mle = la.cho_solve(b,fullVcp)
sos = 0
for k in tree.postorder_edge_iter():
i = k.head_node
if len(i.child_nodes()) != 0:
i.data['val'] = mle[nodenum[i]]
#print i.data['val']
i.label = str(mle[nodenum[i]])
for j in i.child_nodes():
temp = (i.data['val'] - j.data['val'])
sos += temp*temp / j.edge_length
#print "Square Length: ",sos
#calcSE
"""
for i in tree.iternodes(order="postorder"):
if i.istip == False:
qpq = fullMcp[nodenum[i]][nodenum[i]]
tm1 = np.delete(fullMcp,(nodenum[i]),axis=0)
tm = np.delete(tm1,(nodenum[i]),axis=1)
b = cho_factor(tm)
sol = cho_solve(b,tm1[:,nodenum[i]])
tempse = qpq - np.inner(tm1[:,nodenum[i]],sol)
i.data['valse'] = math.sqrt(2*sos/(df*tempse))
"""
return 0
def match_tips_and_cont_values(tree,seqs):
for i in tree:
i.data = {}
if len(i.child_nodes()) == 0:
test = False
for j in seqs:
if i.taxon.label == j.name:
test = True
i.data['cont_values'] = j.cont_values
break
if test == False:
print "can't find "+i.taxon.label+" in cont_values"
return False
if __name__ == "__main__":
if len(sys.argv) != 3:
print "python "+sys.argv[0]+" newick.tre dataasphy"
sys.exit(0)
infile = open(sys.argv[1],"r")
tree = dp.Tree.get(path=sys.argv[1],schema="newick")
infile.close()
seqs = aln_reader.read_phylip_cont_file(sys.argv[2])
match_tips_and_cont_values(tree,seqs)
sqch = calc_cont_anc_states(tree)
outfile = open("contanc_dp.tre","w")
outfile.write(tree.as_string(schema="newick"))
outfile.close()