forked from microsoft/SparseSC
-
Notifications
You must be signed in to change notification settings - Fork 0
/
example-code.py
392 lines (297 loc) · 16.8 KB
/
example-code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
"""
USAGE:
cd path/to/SparseSC
python example-code.py
"""
import os
import sys
#sys.path.append(os.path.join(os.getcwd(),"SparseSC"))
import time
import SparseSC as SC
from SparseSC.tests import ge_dgp
import numpy as np
import random
# def setup(N0,N1,T,K,g,gs,bs ): # controls (N0), treated units (N1) , time periods (T), Predictors (K), groups (g), g-scale, b-scale
if __name__ == "__main__":
# ------------------------------------------------------------
# ------------------------------------------------------------
# SECTION 1: GENERATE SOME TOY DATA
# ------------------------------------------------------------
# ------------------------------------------------------------
# CONTROL PARAMETERS
random.seed(12345)
np.random.seed(10101)
# Controls (per group), Treated (per group), pre-intervention Time points, post intervention time-periods
N0,N1,T0,T1 = 5,5,4,4
# Causal Covariates, Confounders , Random Covariates
K,S,R = 7,4,5
# Number of Groups, Scale of the Group Effect, (groups make the time series correlated for reasons other than the observed covariates...)
groups,group_scale = 30,2
# Scale of the Correlation of the Causal Covariates
beta_scale,confounders_scale = 4,1
X_control, X_treated, Y_pre_control, Y_pre_treated, \
Y_post_control, Y_post_treated = ge_dgp(N0,N1,T0,T1,K,S,R,groups,group_scale,beta_scale,confounders_scale,model= "full")
# JOIN THE TREAT AND CONTROL DATA
X = np.vstack( (X_control, X_treated,) )
Y_pre = np.vstack( (Y_pre_control, Y_pre_treated, ) )
Y_post = np.vstack( (Y_post_control, Y_post_treated,) )
# in the leave-one-out scneario, the pre-treatment outcomes will be part of the covariates
X_and_Y_pre = np.hstack( ( X, Y_pre,) )
X_and_Y_pre_control = np.hstack( ( X_control, Y_pre_control,) )
# IDENTIFIERS FOR TREAT AND CONTROL UNITS
# control_units = np.arange( N0 * groups )
# treated_units = np.arange( N1 * groups ) + N0
# ------------------------------------------------------------
# ------------------------------------------------------------
# find default penalties
# ------------------------------------------------------------
# ------------------------------------------------------------
# get starting point for the L2 penalty
w_pen_start_ct = SC.w_pen_guestimate(X_control)
w_pen_start_loo = SC.w_pen_guestimate(X_and_Y_pre_control)
# get the maximum value for the L1 Penalty parameter conditional on the guestimate for the L2 penalty
L1_max_ct = SC.get_max_v_pen(X_control,Y_pre_control,X_treat=X_treated,Y_treat=Y_pre_treated)
if False:
L1_max_loo = SC.get_max_v_pen(X_and_Y_pre_control[np.arange(100)],Y_post[np.arange(100)])
print("Max L1 loo %s " % L1_max_loo)
else:
L1_max_loo = np.float(147975295.9121998)
if False:
"Demonstrate relations between the L1 and L2 penalties"
# get the maximum value for the L1 Penalty parameter conditional on several L2 penalty parameter values
L2_grid = (2.** np.arange(-1,2))
L1_max_loo_grid = SC.get_max_v_pen(X_and_Y_pre,
Y_post,
w_pen = w_pen_start_loo * L2_grid)
L1_max_ct_grid = SC.get_max_v_pen(X_control,
Y_pre_control,
X_treat=X_treated,
Y_treat=Y_pre_treated,
w_pen = w_pen_start_ct * L2_grid)
assert ( L1_max_loo / L1_max_loo_grid == 1/L2_grid).all()
assert ( L1_max_ct / L1_max_ct_grid == 1/L2_grid).all()
# ------------------------------------------------------------
# create a grid of penalties to try
# ------------------------------------------------------------
n_points = 10 # number of points in the grid
grid_type = "log-linear" # Usually the optimal penalties are quite Small
if grid_type == "simple":
# An equally spaced linear grid that does not include 0 or 1
grid = ( 1 + np.arange(n_points) ) / ( 1 + n_points )
elif grid_type == "linear":
# Another equally spaced linear grid
fmax = 1e-3 # lowest point in the grid relative to the max-lambda
fmin = 1e-5 # highest point in the grid relative to the max-lambda
grid = np.linspace(fmin,fmax,n_points)
elif grid_type == "log-linear":
# Another equally spaced linear grid
fmax = 1e-2 # lowest point in the grid relative to the max-lambda
fmin = 1e-4 # highest point in the grid relative to the max-lambda
grid = np.exp(np.linspace(np.log(fmin),np.log(fmax),n_points))
else:
raise ValueError("Unknown grid type: %s" % grid_type)
# ------------------------------------------------------------
# get the Cross Validation Error over a grid of L1 penalty
# parameters using both (a) Treat/Control and (b) the
# leave-one-out controls only methods
# ------------------------------------------------------------
if False:
print("starting grid scoring for treat / control scenario", grid*L1_max_ct)
grid_scores_ct = SC.CV_score(
X = X_control,
Y = Y_pre_control,
X_treat = X_treated,
Y_treat = Y_pre_treated,
# if v_pen is a single value, we get a single score, If it's an array of values, we get an array of scores.
v_pen = grid * L1_max_ct,
w_pen = w_pen_start_ct,
# CACHE THE V MATRIX BETWEEN v_pen PARAMETERS (generally faster, but path dependent)
cache = False, # False by Default
# Run each of the Cross-validation folds in parallel? Often slower
# for large sample sizes because numpy.linalg.solve() already runs
# in parallel for large matrices
parallel=False,
# ANNOUNCE COMPLETION OF EACH ITERATION
progress = True)
best_L1_penalty_ct = (grid * L1_max_ct)[np.argmin(grid_scores_ct)]
if False:
print("Starting grid scoring for Controls Only scenario with 5-fold gradient descent", grid*L1_max_loo)
grid_scores_loo = SC.CV_score(
X = X_and_Y_pre_control, # limit the amount of time...
Y = Y_post_control , # limit the amount of time...
# this is what enables the k-fold gradient descent
grad_splits = 5,
random_state = 10101, # random_state for the splitting during k-fold gradient descent
# L1 Penalty. if v_pen is a single value (value), we get a single score, If it's an array of values, we get an array of scores.
v_pen = grid * L1_max_loo,
# L2 Penalty (float)
w_pen = w_pen_start_loo,
# CACHE THE V MATRIX BETWEEN v_pen PARAMETERS (generally faster, but path dependent)
#cache = True, # False by Default
# Run each of the Cross-validation folds in parallel? Often slower
# for large sample sizes because numpy.linalg.solve() already runs
# in parallel for large matrices
parallel=False,
# announce each call to `numpy.linalg.solve(A,B)` (the major bottleneck)
verbose = False, # it's kind of obnoxious, but gives a sense of running time per gradient calculation
# ANNOUNCE COMPLETION OF EACH ITERATION
progress = True)
if False:
# even with smaller data, this takes a while.
print("Starting grid scoring for Controls Only scenario with leave-one-out gradient descent", grid*L1_max_loo)
grid_scores_loo = SC.CV_score(
X = X_and_Y_pre_control [np.arange(100)], # limit the amount of time...
Y = Y_post_control [np.arange(100)], # limit the amount of time...
# with `grad_splits = None` (the default behavior) we get leave-one-out gradient descent.
grad_splits = None,
# L1 Penalty. if v_pen is a single value (value), we get a single score, If it's an array of values, we get an array of scores.
v_pen = grid * L1_max_loo,
# L2 Penalty (float)
w_pen = w_pen_start_loo,
# CACHE THE V MATRIX BETWEEN v_pen PARAMETERS (generally faster, but path dependent)
#cache = True, # False by Default
# Run each of the Cross-validation folds in parallel? Often slower
# for large sample sizes because numpy.linalg.solve() already runs
# in parallel for large matrices
parallel=False,
# announce each call to `numpy.linalg.solve(A,B)` (the major bottleneck)
verbose = False, # it's kind of obnoxious, but gives a sense of running time per gradient calculation
# ANNOUNCE COMPLETION OF EACH ITERATION
progress = True)
# ---------------------------------------------------------------------------
# Calculate Synthetic Control weights for a fixed pair of penalty parameters
# ---------------------------------------------------------------------------
# This is a two-step process because in principle, we can estimate a
# tensor matrix (which contains the relative weights of the covariates and
# possibly pre-treatment outcomes) in one population, and apply it to
# another population.
best_L1_penalty_ct = np.float(1908.9329)
# -----------------------------------
# Treat/Control:
# -----------------------------------
V_ct = SC.tensor(X = X_control,
Y = Y_pre_control,
X_treat = X_treated,
Y_treat = Y_pre_treated,
v_pen = best_L1_penalty_ct,
w_pen = w_pen_start_ct)
SC_weights_ct = SC.weights(X = X_control,
X_treat = X_treated,
V = V_ct,
w_pen = w_pen_start_ct)
Y_post_treated_synthetic_conrols_ct = SC_weights_ct.dot(Y_post_control)
ct_prediction_error = Y_post_treated_synthetic_conrols_ct - Y_post_treated
null_model_error = Y_post_treated - np.mean(Y_pre_treated)
R_squared_post_ct = 1 - np.power(ct_prediction_error,2).sum() / np.power(null_model_error,2).sum()
print( "C/T: Out of Sample post intervention R squared: %0.2f%% " % (100*R_squared_post_ct,))
# -----------------------------------
# Leave-One-Out with Control Only:
# -----------------------------------
if False:
# this takes a while:
V_loo = SC.tensor(
X = X_and_Y_pre [np.arange(100)], # limit the amount of time...
Y = Y_post [np.arange(100)], # limit the amount of time...
v_pen = best_L1_penalty_loo,
w_pen = w_pen_start_loo)
SC_weights_loo = SC.weights(X = X_control,
V = V_loo,
w_pen = w_pen_start_loo)
# in progress...
import pdb; pdb.set_trace()
Y_post_treated_synthetic_conrols_loo = SC_weights_loo.dot(Y_post_control)
loo_prediction_error = Y_post_treated_synthetic_conrols_loo - Y_post_treated
null_model_error = Y_post_treated - np.mean(Y_pre_treated)
R_squared_post_loo = 1 - np.power(loo_prediction_error,2).sum() / np.power(null_model_error,2).sum()
print( "LOO: Out of Sample post intervention R squared: %0.2f%% " % (100*R_squared_post_loo,))
import pdb; pdb.set_trace()
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# Optimization of the L1 and L2 parameters together (second order)
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
from scipy.optimize import fmin_l_bfgs_b, differential_evolution
import time
# -----------------------------------------------------------------
# Optimization of the L2 and L1 Penalties Simultaneously, keeping their
# product constant. Heuristically, this has been most efficient means of
# optimizing the L2 Parameter.
# -----------------------------------------------------------------
if False:
# build the objective function to be minimized
# cache for L2_obj_func
n_calls = [0,]
temp_results =[]
SS = np.power(Y_pre_treated - np.mean(Y_pre_treated),2).sum()
best_L1_penalty_ct = np.float(1908.9329)
def L1_L2_const_obj_func (x):
n_calls[0] += 1
t1 = time.time();
score = SC.CV_score(X = X_control,
Y = Y_pre_control,
X_treat = X_treated,
Y_treat = Y_pre_treated,
# if v_pen is a single value, we get a single score, If it's an array of values, we get an array of scores.
v_pen = best_L1_penalty_ct * np.exp(x[0]),
w_pen = w_pen_start_ct / np.exp(x[0]),
# suppress the analysis type message
quiet = True)
t2 = time.time();
temp_results.append((n_calls[0],x,score))
print("calls: %s, time: %0.4f, x0: %0.4f, Cross Validation Error: %s, out-of-sample R-Squared: %s" % (n_calls[0], t2 - t1, x[0], score, 1 - score / SS ))
#print("calls: %s, time: %0.4f, x0: %0.4f, x1: %0.4f, Cross Validation Error: %s, R-Squared: %s" % (n_calls[0], t2 - t1, x[0], x[1], score, 1 - score / SS ))
return score
# the actual optimization
print("Starting L1-L2 Penalty (product constant) optimization")
#results = differential_evolution(L1_L2_const_obj_func, bounds = ((-6,6,),) )
#results = differential_evolution(L1_L2_obj_func, bounds = ((-6,6,),)*2 )
import pdb; pdb.set_trace()
NEW_best_L1_penalty_ct = best_L1_penalty_ct * np.exp(results.x[0])
best_L2_penalty = w_pen_start_ct * np.exp(results.x[1])
print("DE optimized L2 Penalty: %s, DE optimized L1 penalty: %s" % (NEW_best_L1_penalty_ct, best_L2_penalty,) )
# -----------------------------------------------------------------
# Optimization of the L2 Parameter alone
# -----------------------------------------------------------------
if False:
# -----------------------
# build the objective function to be minimized
# -----------------------
# OBJECTIVE FUNCTION(S) TO MINIMIZE USING DE
# cache for L2_obj_func
n_calls = [0,]
temp_results =[]
SS = np.power(Y_pre_treated - np.mean(Y_pre_treated),2).sum()
best_L1_penalty_ct = np.float(1908.9329)
def L2_obj_func (x):
n_calls[0] += 1
t1 = time.time();
score = SC.CV_score(X = X_control,
Y = Y_pre_control,
X_treat = X_treated,
Y_treat = Y_pre_treated,
# if v_pen is a single value, we get a single score, If it's an array of values, we get an array of scores.
v_pen = best_L1_penalty_ct,
w_pen = w_pen_start_ct * np.exp(x[0]),
# suppress the analysis type message
quiet = True)
t2 = time.time();
temp_results.append((n_calls[0],x,score))
print("calls: %s, time: %0.4f, x0: %0.4f, Cross Validation Error: %s, out-of-sample R-Squared: %s" %
(n_calls[0], t2 - t1, x[0], score, 1 - score / SS ))
return score
# the actual optimization
print("Starting L2 Penalty optimization")
results = differential_evolution(L2_obj_func, bounds = ((-6,6,),))
NEW_L2_pen_start_ct = w_pen_start_ct * np.exp(results.x[0])
print("DE optimized L2 Penalty: %s, using fixed L1 penalty: %s" % (NEW_L2_pen_start_ct, best_L1_penalty_ct,) )
# -----------------------------------------------------------------
# Optimization of the L2 and L1 Penalties Simultaneously
# -----------------------------------------------------------------
best_L1_penalty_ct = np.float(1908.9329)
# the actual optimization
print("Starting L1-L2 Joint Penalty optimization")
results = SC.joint_penalty_optimzation(X = X_control, Y = Y_pre_control, L1_pen_start = best_L1_penalty_ct, L2_pen_start = w_pen_start_ct, bounds = ((-6,6,),)*2, X_treat = X_treated, Y_treat = Y_pre_treated)
import pdb; pdb.set_trace()
NEW_best_L1_penalty_ct = results.x[0]
best_L2_penalty = results.x[1]
print("DE optimized L2 Penalty: %s, DE optimized L1 penalty: %s" % (NEW_best_L1_penalty_ct, best_L2_penalty,) )