-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathbhm_local_search.py
182 lines (144 loc) · 8.05 KB
/
bhm_local_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# -*- coding: utf-8 -*-
"""
@author: Makan Arastuie
"""
import time
import numpy as np
import multiprocessing
from joblib import Parallel, delayed
import generative_model_utils as utils
import bhm_generative_model as bhm
from sklearn.metrics import adjusted_rand_score
from spectral_clustering import regularized_spectral_cluster
import bhm_parameter_estimation as bhm_estimate_utils
def calc_node_neigh_solutions(event_dict, n_classes, duration, node_membership, log_lik_init, node_batch):
"""
Calculates the log-likelihood of neighboring solutions of a batch of nodes by changing their membership. If a higher
log-likelihood was achieved the best solution will be returned, else a tuple of three np.nan is returned.
:param event_dict: Edge dictionary of events between all node pair. Output of the generative models.
:param n_classes: (int) total number of classes
:param duration: (int) Duration of the network
:param node_membership: (list) membership of every node to one of K classes
:param log_lik_init: (float) base log-likelihood of the current solution
:param node_batch: (list) nodes in the current batch
:return: (node index, best class index, log_likelihood)
"""
best_neigh = (np.nan, np.nan, np.nan)
log_lik = log_lik_init
# node_membership = node_membership.copy()
for n_i in node_batch:
n_i_class = node_membership[n_i]
# Adding a constraint to maintain the number of blocks.
if np.sum(node_membership == n_i_class) <= 2:
continue
for c_i in range(n_classes):
if c_i == n_i_class:
continue
# update node_membership temporarily
node_membership[n_i] = c_i
# Eval the aprox log_lik of this neighbor, by est all block parameters.
(neigh_log_lik,
fitted_params) = bhm_estimate_utils.estimate_hawkes_param_and_calc_log_likelihood(event_dict,
node_membership,
duration, n_classes,
False)
# if log_lik if this neighbor is better than the "so far" best neighbor, use this neighbors as the best.
if log_lik < neigh_log_lik:
log_lik = neigh_log_lik
best_neigh = (n_i, c_i, log_lik)
node_membership[n_i] = n_i_class
return best_neigh
def block_local_search(event_dict, n_classes, node_membership_init, duration, max_iter=100, n_cores=-1,
return_fitted_param=False, verbose=False):
"""
Performs local search / hill climbing to increase log-likelihood of the model by switching the community of a single
node at a time.
:param event_dict: Edge dictionary of events between all node pair. Output of the generative models.
:param n_classes: (int) total number of classes
:param node_membership_init: (list) initial membership of every node to one of K classes.
:param duration: (int) Duration of the network.
:param max_iter: (int) maximum number of iterations to be performed by local search.
:param n_cores: (int) number of cores to be used to parallelize the search. If -1, use all available cores.
:param return_fitted_param: if True, return the Hawkes parameters for the model as well.
:param verbose: If True, prints more information on local search.
:return: local optimum node_membership if `return_fitted_param` is false.
"""
n_nodes = len(node_membership_init)
nodes = np.arange(n_nodes)
node_membership = node_membership_init
# estimate initial params of block model and its log-likelihood
(init_log_lik,
fitted_params) = bhm_estimate_utils.estimate_hawkes_param_and_calc_log_likelihood(event_dict, node_membership,
duration, n_classes,
add_com_assig_log_prob=False)
log_lik = init_log_lik
n_cores = n_cores if n_cores > 0 else multiprocessing.cpu_count()
batch_size = np.int(n_nodes / n_cores) + 1
# print(n_cores)
for iter in range(max_iter):
if verbose:
print(f"Iteration {iter}...", end='\r')
tic = time.time()
# for each of the (k-1)*n neighboring solutions
possible_solutions = Parallel(n_jobs=n_cores)(delayed(calc_node_neigh_solutions)
(event_dict, n_classes, duration, node_membership, log_lik,
nodes[batch_size * ii: batch_size * (ii + 1)])
for ii in range(n_cores))
toc = time.time()
print(f"Iter {iter}, took: {(toc - tic)/3600:.3f}h")
possible_solutions = np.array(possible_solutions)
# if all returned log-likelihoods are np.nan, break. You're at a local optima.
if np.all(np.isnan(possible_solutions[:, 2])):
if verbose:
print(f"Local solution found with {iter} iterations.")
break
max_ll_neigh_idx = np.nanargmax(possible_solutions[:, 2])
best_node_to_switch = int(possible_solutions[max_ll_neigh_idx, 0])
best_class_to_switch_to = int(possible_solutions[max_ll_neigh_idx, 1])
# if a good neighbor was found, update best log_lik, and go for the next iteration.
node_membership[best_node_to_switch] = best_class_to_switch_to
(log_lik,
fitted_params) = bhm_estimate_utils.estimate_hawkes_param_and_calc_log_likelihood(event_dict,
node_membership,
duration, n_classes,
False)
if iter == max_iter - 1:
print("Warning: Max iter reached!")
if verbose:
print(f"likelihood went from {init_log_lik:.4f} to {log_lik:.4f}. "
f"{100 * np.abs((log_lik - init_log_lik) / init_log_lik):.2f}% increase.")
if return_fitted_param:
mu, alpha, beta = fitted_params
return node_membership, mu, alpha, beta
return node_membership
# Example of running local search on Block Hawkes model.
if __name__ == '__main__':
seed = None
n_classes = 4
n_nodes = 64
duration = 50
class_probs = np.ones(n_classes) / n_classes
alpha = 0.6
beta = 0.8
mu_diag = 1.6
mu_off_diag = 0.8
bp_alpha = np.ones((n_classes, n_classes), dtype=np.float) * alpha
bp_beta = np.ones((n_classes, n_classes), dtype=np.float) * beta
bp_mu = np.ones((n_classes, n_classes), dtype=np.float) * mu_off_diag
np.fill_diagonal(bp_mu, mu_diag)
true_class_assignments, event_dict = bhm.block_generative_model(n_nodes, class_probs,
bp_mu, bp_alpha, bp_beta,
duration, seed=seed)
true_class_assignments = utils.one_hot_to_class_assignment(true_class_assignments)
binary_adj = utils.event_dict_to_adjacency(n_nodes, event_dict)
spectral_node_membership = regularized_spectral_cluster(binary_adj, num_classes=n_classes)
sc_rand = adjusted_rand_score(true_class_assignments, spectral_node_membership)
print(f"SC Rand index: {sc_rand:.3f}")
print("Parallel")
tic = time.time()
local_search_node_membership = block_local_search(event_dict, n_classes, spectral_node_membership, duration,
max_iter=10, n_cores=34, verbose=True)
toc = time.time()
print(f"local search took {toc - tic:.2f}s.")
sc_rand = adjusted_rand_score(true_class_assignments, local_search_node_membership)
print(f"Local search Rand index: {sc_rand:.3f}")