-
Notifications
You must be signed in to change notification settings - Fork 10
/
graphfns_Neurohack.py
266 lines (209 loc) · 9.42 KB
/
graphfns_Neurohack.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
#Imports
import numpy as np
import scipy.io as spio
import matplotlib.pyplot as plt
from matplotlib import cm
import pandas as pd
#Functions
def threshold_matrix_density(corrmat,thr,return_thr = False):
""" Function that thresholds a given correlation matrix by density so a set proportion
of edges passes threshold.
Inputs:
corrmat: a square symmetric matrix
thr: density threshold to use
return_thr = return r and density thresholds calculated from data (default = False)
Returns:
corrmat_thr = a thresholded correlation matrix
[optionally also returns r and k density thresholds calculated from the data
Based on a script originally by JD Power
CG edited and converted to python
"""
cshp = corrmat.shape
if cshp[0] != cshp[1]:
raise ValueError("input matrix is not square")
nodnum = cshp[0]
numpossibleedges = int(nodnum * (nodnum-1)/2)
edgesleft = int(np.ceil(thr * numpossibleedges))
# take upper triangle of the matrix
corrmat_tinds = np.triu_indices(nodnum,1)
corrmat_lin = corrmat[corrmat_tinds]
# sort values in the upper triangle and select lowest thr%
sorti = np.argsort(-corrmat_lin) #descending order
keptedges = np.zeros(corrmat_lin.shape)
keptedges[sorti[:edgesleft]] = corrmat_lin[sorti[:edgesleft]]
#reshape into a matrix
corrmat_thr = np.zeros(corrmat.shape)
corrmat_thr[corrmat_tinds] = keptedges
corrmat_thr_sym = corrmat_thr + np.transpose(corrmat_thr) # make it symmetric again
if return_thr:
#some quick checks on where threshold was set
kthr = edgesleft/numpossibleedges
rthr = corrmat_lin[sorti[edgesleft]]
return corrmat_thr, corrmat_thr_sym, kthr, rthr
else:
return corrmat_thr, corrmat_thr_sym
def make_gephi_node_inputfile(Parcel_params,nod_colors=[]):
""" Function to make input csv files needed for gephi for nodes
Inputs:
Parcel_params: a dictionary with information about the nodes in a graph
nod_color: optional input, node x 3 array of RGB values
if not included, will instead use network colors for nodes
Returns:
node_data: a Pandas data frame with information for a gephi csv node file
"""
# some constants we use a lot
nrois = Parcel_params['num_rois']
net_colors = Parcel_params['net_colors']
net_nums = np.unique(Parcel_params['mods_array']) #to deal with numbers being non-sequential
# some basic columns needed for gephi (for now, nothing important to add here)
ID = np.arange(nrois) #np.squeeze(Parcel_params['roi_sort']) + 1 #index from 1
Label = np.zeros(nrois)
Interval = np.zeros(nrois)
Network = np.squeeze(Parcel_params['mods_array'])
Hub1 = np.zeros(nrois)
Hub2 = np.zeros(nrois)
Hub3 = np.zeros(nrois)
# create a color array (one entry for each node)
roi_rgb = [] #colors in a list of rgb strings as inputs for gephi
for roi in range(nrois):
if np.size(nod_colors) > 0:
nod_color = np.squeeze(nod_colors[roi,:])
else:
rind = np.where(net_nums == Parcel_params['mods_array'][roi])
nod_color = net_colors[rind[0][0]]
rgb_str = '%.03f,%.03f,%.03f' %(nod_color[0]*255,nod_color[1]*255,nod_color[2]*255)
roi_rgb.append(rgb_str)
# make this into a Pandas DataFrame
items = np.transpose([ID,Label,Interval,Network,roi_rgb,Hub1,Hub2,Hub3])
node_data = pd.DataFrame(items,columns = ['ID','Label','Interval','Network','Color','Hub1','Hub2','Hub3'])
return node_data
def make_gephi_edge_inputfile(corrmat):
""" Function to make input csv files needed for gephi edges
Inputs:
corrmat: an NxN correlation matrix (can be weighted or unweighted)
should be zeroed under upper tri so edges aren't double counted
but assumes edges are undirected
should be thresholded or output file will be very large
Output:
edge_data: a Panda data frame with information for a gephi edge file
"""
# find edges and store these into lists
edge_inds = np.where(corrmat>0)
nedges = np.size(edge_inds[0])
Source = edge_inds[0]
Target = edge_inds[1]
# fill in some other information about the edges
Type = ['Undirected'] * nedges
Weight = corrmat[edge_inds] # or enter 1 for unweighted?
ID = np.arange(nedges)
Label = np.zeros(nedges)
Interval = np.zeros(nedges)
Hub1 = np.ones(nedges)
Hub2 = np.ones(nedges)
Hub3 = np.ones(nedges)
# make a pandas data frame with all of the information to save into csv
items = np.transpose([Source, Target, Type, ID, Label, Interval, Weight, Hub1, Hub2, Hub3])
edge_data = pd.DataFrame(items,columns = ['Source','Target','Type','ID','Label','Interval','Weight',
'Hub1','Hub2','Hub3'])
return edge_data
def hub_metrics(adj_mat,mod_array):
""" Function for calculating hub metrics
Inputs:
adj_mat: a NxN unweighted symmetric matrix
mod_array: a Nx1 array of community labels for each node
Returns:
deg: an Nx1 array of degree values for each node
WD: an Nx1 array of within module degree for each node
PC: an Nx1 array of participation coefficient for each node
see Guimera & Amaral (2005) Nature for definitions
"""
# some constants/initializations
communities = np.unique(mod_array) # get community IDs
nnod = mod_array.shape[0]
nodes = np.arange(nnod)
WD = np.zeros(nnod) # any nodes not in a community get WD=0
PC = np.zeros(nnod)
# prep input matrix:
# make sure diagonal is zeroed (should be true already)
# and make matrix binarized (below are unweighted calcs)
adj_mat[np.diag_indices(nnod)] = 0
adj_mat = adj_mat > 0
# quick degree calculation
deg = np.nansum(adj_mat,1)
# WD calculation
for comm in communities:
comm_nodes = np.where(mod_array == comm)[0]
comm_mat = adj_mat[:,comm_nodes][comm_nodes,:]
ki = np.nansum(comm_mat,1) # density of connections within module
mean_ks = np.mean(ki) #average across all nodes in a module
sigma_ks = np.std(ki) #std across all nodes in a module
zi = (ki - mean_ks)/sigma_ks #z-score per module
WD[comm_nodes] = zi
# if they're all the same, make them all WD = 0
if sigma_ks == 0:
WD[comm_nodes] = 0
# PC calculation
for nod in nodes:
kis = np.zeros(np.size(communities)) # num links of node i to mod s
for c,comm in enumerate(communities):
# get all nodes in community that are not n
comm_nodes = list(set(np.where(mod_array == comm)[0]) - set([nod]))
node_to_comm = adj_mat[:,nod][comm_nodes]
kis[c] = np.sum(node_to_comm)
PC[nod] = 1 - np.sum((kis/deg[nod])**2)
# deg in mod / total deg : close to 1 if all in one mod
# therefore PC is low if most connections are to one mod
# for disconnected nodes, set PC = 0
if deg[nod] == 0:
PC[nod] = 0
return PC, WD, deg
def figure_hubs(Parcel_params,thresholds,degree,wd,pc):
""" Function for making a plot of hub measures across thresholds.
Inputs:
Parcel_params: a dictionary with information about the ROI choice
thresholds: a 1D array with thresholds that the hub measures were taken at
degree: a 1D array with degree hub measure per node (# of connections to each node)
wd: a 1D array with within module hub measure per node
z-scored measure of the number of connections of a node to its module
pc: a 1D array with participation coefficient measure per node
dispersion measue of proportion of connections node has across mods
Returns:
fig: a figure handle
"""
# for ease:
sorti = np.squeeze(np.transpose(Parcel_params['roi_sort']))
transitions = Parcel_params['transitions']
centers = Parcel_params['centers']
networks = Parcel_params['networks']
hub_measures = {'degree':degree,'within-mod':wd,'part-coeff':pc}
thresh_str_fmt = ['%.2f' % (t) for t in thresholds]
fig, axs = plt.subplots(1,3,figsize=(10,8), constrained_layout = True)
for hm,hubm in enumerate(hub_measures.keys()):
im = axs[hm].imshow(hub_measures[hubm][sorti,:],cmap='magma',aspect='auto')
axs[hm].set_title(hubm)
axs[hm].set(xlabel='thresholds', ylabel = 'networks')
for tr in transitions:
axs[hm].axhline(tr,0,thresholds.size,color='w')
axs[hm].set_yticks(centers)
axs[hm].set_yticklabels(networks)
axs[hm].set_xticks(np.arange(0,thresholds.size,4)) #only plot every fifth
axs[hm].set_xticklabels(thresh_str_fmt[:thresholds.size:4],fontsize='small')
fig.colorbar(im,ax=axs[hm],location='bottom')
for ax in axs.flat:
ax.label_outer()
plt.show()
return fig
def hub_colormap(hubvals):
""" Function to transform hub values into an RGB color map for plotting in a graph
Inputs:
hubvals: a 1D array with hub values per node. Will by default try to fit colormap to range.
Returns:
hub_colors: a nodex3 array with RGB color values per node
"""
hub_colors = np.zeros([hubvals.shape[0],3])
map_vals = cm.get_cmap('jet',101)
map_range = [np.min(hubvals), np.max(hubvals)]
for r in range(hub_colors.shape[0]):
cind = (hubvals[r]-map_range[0])/(map_range[1] - map_range[0])
hub_colors[r,:] = map_vals(cind)[:3]
return hub_colors