-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathdataset.py
101 lines (83 loc) · 4.24 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import os
import numpy as np
import pandas as pd
# Class of behavioral measurments
class DataBehavioral(object):
def __init__(self, sourcedir):
super(DataBehavioral, self).__init__()
self.df = pd.read_csv(os.path.join(sourcedir, 'behavioral', 'hcp.csv')).set_index('Subject')
def get_feature(self, feature):
behavioral_features = self.df[feature].to_dict()
behavioral_label = {}
for f in feature:
behavioral_label[f] = {}
seen = []
for v in behavioral_features[f].values():
if v not in seen:
seen.append(v)
seen.sort()
for k, v in behavioral_features[f].items():
label = seen.index(v)
behavioral_label[f][k] = label
return behavioral_features, behavioral_label # dict {subject: feature_string} / dict {subject: label}
# Class of nodes, i.e. ROI features
class DataNodes(object):
def __init__(self, sourcedir):
super(DataNodes, self).__init__()
self.df = pd.read_csv(os.path.join(sourcedir, 'roi', '7_400.txt'), index_col=0, header=None, delimiter='\t')
self.features = self.df[1].str.split("_", expand=True)
self.features.columns = ['YeoNetwork', 'Hemisphere', 'Network', 'Region', 'Index']
self.df_coord = pd.read_csv(os.path.join(sourcedir, 'roi', '7_400_coord.csv'), index_col=0)[1:]
for i in self.features.index:
row = self.features.loc[i]
if row.isnull().any():
row[4] = row[3]
row[3] = row[2]
def __call__(self, subject):
self.df_timeseries = pd.read_csv(os.path.join(sourcedir, 'timeseries', f'{subject}.txt'), index_col=False, header=None, delimiter='\t').dropna(axis='columns').to_numpy()
def get_feature(self, type): # List of 'YeoNetwork', 'Hemisphere', 'Network', 'Region', 'Index'
feature=['Hemisphere', 'Region', 'Network', 'Index']
if type=='one_hot':
filtered_features = self.features[feature]
node_features = filtered_features.apply(lambda x: '_'.join(x), axis='columns').to_dict()
node_label = {}
seen = []
for v in node_features.values():
if v not in seen:
seen.append(v)
for k, v in node_features.items():
label = seen.index(v)
node_label[k] = label
return node_features, node_label # dict {roi: feature_string} / dict {roi: label_value}
elif type=='coordinate':
filtered_features = self.df_coord[['R','A','S']]
node_label_dict = filtered_features.to_dict()
node_label = {}
for k in node_label_dict['R'].keys():
node_label[k] = (node_label_dict['R'][k], node_label_dict['A'][k], node_label_dict['S'][k])
return node_label_dict, node_label # dict {R,A,S:{roi: coordinate}} / dict {roi: tuple (R,A,S) coordinate}
elif type=='mean_bold':
node_label_numpy = np.mean(self.df_timeseries, axis=0)
node_label_numpy = (node_label_numpy - node_label_numpy.mean()) / (node_label_numpy.std() + 1e-8)
node_label = {}
for i, timeseries in enumerate(node_label_numpy):
node_label[i] = tuple([timeseries])
return node_label_numpy, node_label
else:
raise Exception('unknown node feature type')
# Class of edges, i.e. FC features
class DataEdges(object):
def __init__(self, sourcedir):
super(DataEdges, self).__init__()
self.sourcedir = sourcedir
def __call__(self, subject):
self.df = pd.read_csv(os.path.join(self.sourcedir, 'connectivity', f'r{subject}.txt'), index_col=False, header=None, delimiter='\t').dropna(axis='columns').to_numpy()
def get_adjacency(self, threshold):
mask = (self.df > np.percentile(self.df, threshold)).astype(np.uint8)
nodes, neighbors = np.nonzero(mask)
sparse_mask = {}
for i, node in enumerate(nodes):
if neighbors[i] > node:
if not node in sparse_mask: sparse_mask[node] = [neighbors[i]]
else: sparse_mask[node].append(neighbors[i])
return mask, sparse_mask # matrix adjacency / dict {roi: neighbor_roi}