-
Notifications
You must be signed in to change notification settings - Fork 2
/
topcorr.py
52 lines (37 loc) · 1.56 KB
/
topcorr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import collections
import pandas as pd
import numpy as np
from tmfg_core import *
import networkx as nx
from sklearn.preprocessing import MinMaxScaler
def tmfg(data, method, dataset_name, correlation_type, alpha, step, threshold_mean=True):
if method == 'pearson':
if step == 'cv':
corr = pd.DataFrame(data).corr(method='pearson').to_numpy()
elif step == 'test':
corr = pd.read_csv(f'{method}_{dataset_name}.csv').to_numpy()
elif method == 'spearman':
if step == 'cv':
corr = pd.DataFrame(data).corr(method='spearman').to_numpy()
elif step == 'test':
corr = pd.read_csv(f'{method}_{dataset_name}.csv').to_numpy()
else:
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)
std = scaled_data.std(axis=0).reshape([-1,1])
sigma_ij = np.maximum(std, std.transpose())
if step == 'cv':
corr_ij = 1 - np.abs(pd.DataFrame(data).corr(method='spearman').to_numpy())
elif step == 'test':
corr_ij = 1 - np.abs(pd.read_csv(f'spearman_{dataset_name}.csv').to_numpy())
corr = alpha * sigma_ij + (1-alpha) * corr_ij
corr = np.nan_to_num(corr, 0)
p = corr.shape[0]
weight_corr = corr
if correlation_type == 'square' and method != 'energy':
weight_corr = np.square(corr)
if threshold_mean:
weight_corr[weight_corr < weight_corr.mean()] = 0
tmfg = TMFG(pd.DataFrame(weight_corr))
cliques, seps, JS = tmfg.compute_TMFG()
return nx.from_numpy_matrix(JS)